In [6]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [7]:
# 1️⃣ Load Preprocessed Data
# ---------------------------
df = pd.read_csv('../Data/preprocessed.csv')

X = df.drop('Response', axis=1)
y = df['Response']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print('Train:', X_train.shape, 'Test:', X_test.shape)

Train: (2400, 6) Test: (600, 6)


In [8]:
# 2️⃣ Define Param Grids
# ---------------------------
param_grids = {
    "Logistic Regression": {
        'C': [0.01, 0.1, 1, 10],
        'solver': ['liblinear', 'lbfgs']
    },
    "SVM": {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf']
    },
    "Random Forest": {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 5, 10]
    },
    "AdaBoost": {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.5, 1.0, 1.5]
    },
    "Gradient Boosting": {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.05, 0.1, 0.2],
        'max_depth': [3, 5, 7]
    }
}

In [9]:
# 3️⃣ Run GridSearchCV & Evaluate Tuned Models
# ---------------------------
base_models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

results = []
best_params = {}

for name, model in base_models.items():
    print(f"\n🔍 Tuning {name} ...")
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    
    best_model = grid.best_estimator_
    best_params[name] = grid.best_params_
    
    y_pred = best_model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1
    })
    
    print(f"✅ {name} Tuned → F1-Score: {f1:.4f}")


🔍 Tuning Logistic Regression ...
✅ Logistic Regression Tuned → F1-Score: 1.0000

🔍 Tuning SVM ...
✅ SVM Tuned → F1-Score: 1.0000

🔍 Tuning Random Forest ...
✅ Random Forest Tuned → F1-Score: 1.0000

🔍 Tuning AdaBoost ...
✅ AdaBoost Tuned → F1-Score: 1.0000

🔍 Tuning Gradient Boosting ...
✅ Gradient Boosting Tuned → F1-Score: 1.0000


In [10]:
# 4️⃣ Show Final Comparison Table
# ---------------------------
results_df = pd.DataFrame(results)
print("\n📊 Tuned Models Comparison:")
print(results_df.sort_values(by="F1-Score", ascending=False).reset_index(drop=True))


📊 Tuned Models Comparison:
                 Model  Accuracy  Precision  Recall  F1-Score
0  Logistic Regression       1.0        1.0     1.0       1.0
1                  SVM       1.0        1.0     1.0       1.0
2        Random Forest       1.0        1.0     1.0       1.0
3             AdaBoost       1.0        1.0     1.0       1.0
4    Gradient Boosting       1.0        1.0     1.0       1.0


In [11]:
# 5️⃣ Save Comparison & Best Params
# ---------------------------
results_df.to_csv('../Evaluation/tuned_models_comparison.csv', index=False)
print("\n✅ Saved: tuned_models_comparison.csv")

params_df = pd.DataFrame(list(best_params.items()), columns=['Model', 'BestParams'])
params_df.to_csv('../Evaluation/best_hyperparameters.csv', index=False)
print("✅ Saved: best_hyperparameters.csv")


✅ Saved: tuned_models_comparison.csv
✅ Saved: best_hyperparameters.csv
