In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy
from sklearn.pipeline import Pipeline
import joblib

In [None]:
path = "augmented_data.csv"
df = pd.read_csv(path)

In [None]:
df.columns


In [None]:
df.columns = [
    'Decision',
    'Soil.Texture',
    'Soil.Colour',
    'Geological.Features',
    'Elevation',
    'Natural.vegitation..tree..vigour',
    'Natural.vegitation..tree..height',
    'Drainage.Density'
]

In [None]:
df["Decision"] = df["Decision"].str.strip()
y = df["Decision"].map({"High Potential": 1, "Low Potential": 0})

In [None]:
X = df.drop("Decision", axis=1)

In [None]:
#Encoding
encoder = OrdinalEncoder()
X_encoded = pd.DataFrame(encoder.fit_transform(X), columns=X.columns)

In [None]:
#Boruta
rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1,
    class_weight="balanced"
)

boruta = BorutaPy(
    rf,
    n_estimators="auto",
    random_state=42
)

boruta.fit(X_encoded.values, y.values)

important_features = X_encoded.columns[boruta.support_].tolist()
print("Selected Features:", important_features)

X_selected = X_encoded[important_features]

In [None]:
X_selected


In [None]:
#Splitting
X_train, X_test, y_train, y_test = train_test_split(
    X_selected,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [None]:
#Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
#Train e SVM
svm_model = SVC(
    kernel="rbf",
    C=1,
    gamma="scale",
    probability=True,
    random_state=42
)

svm_model.fit(X_train_scaled, y_train)

In [None]:
#Evaluation
y_pred = svm_model.predict(X_test_scaled)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
#Saving masinhi
joblib.dump(svm_model, "svm_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoder, "encoder.pkl")
joblib.dump(important_features, "selected_features.pkl")



In [None]:
print("\nAll files saved successfully.")