In [6]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [7]:
# 1Ô∏è‚É£ Load Preprocessed Data
# ---------------------------
df = pd.read_csv('../Data/preprocessed.csv')

X = df.drop('Response', axis=1)
y = df['Response']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print('Train:', X_train.shape, 'Test:', X_test.shape)

Train: (2400, 6) Test: (600, 6)


In [8]:
# 2Ô∏è‚É£ Define Param Grids
# ---------------------------
param_grids = {
    "Logistic Regression": {
        'C': [0.01, 0.1, 1, 10],
        'solver': ['liblinear', 'lbfgs']
    },
    "SVM": {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf']
    },
    "Random Forest": {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 5, 10]
    },
    "AdaBoost": {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.5, 1.0, 1.5]
    },
    "Gradient Boosting": {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.05, 0.1, 0.2],
        'max_depth': [3, 5, 7]
    }
}

In [9]:
# 3Ô∏è‚É£ Run GridSearchCV & Evaluate Tuned Models
# ---------------------------
base_models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

results = []
best_params = {}

for name, model in base_models.items():
    print(f"\nüîç Tuning {name} ...")
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    
    best_model = grid.best_estimator_
    best_params[name] = grid.best_params_
    
    y_pred = best_model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1
    })
    
    print(f"‚úÖ {name} Tuned ‚Üí F1-Score: {f1:.4f}")


üîç Tuning Logistic Regression ...
‚úÖ Logistic Regression Tuned ‚Üí F1-Score: 1.0000

üîç Tuning SVM ...
‚úÖ SVM Tuned ‚Üí F1-Score: 1.0000

üîç Tuning Random Forest ...
‚úÖ Random Forest Tuned ‚Üí F1-Score: 1.0000

üîç Tuning AdaBoost ...
‚úÖ AdaBoost Tuned ‚Üí F1-Score: 1.0000

üîç Tuning Gradient Boosting ...
‚úÖ Gradient Boosting Tuned ‚Üí F1-Score: 1.0000


In [10]:
# 4Ô∏è‚É£ Show Final Comparison Table
# ---------------------------
results_df = pd.DataFrame(results)
print("\nüìä Tuned Models Comparison:")
print(results_df.sort_values(by="F1-Score", ascending=False).reset_index(drop=True))


üìä Tuned Models Comparison:
                 Model  Accuracy  Precision  Recall  F1-Score
0  Logistic Regression       1.0        1.0     1.0       1.0
1                  SVM       1.0        1.0     1.0       1.0
2        Random Forest       1.0        1.0     1.0       1.0
3             AdaBoost       1.0        1.0     1.0       1.0
4    Gradient Boosting       1.0        1.0     1.0       1.0


In [11]:
# 5Ô∏è‚É£ Save Comparison & Best Params
# ---------------------------
results_df.to_csv('../Evaluation/tuned_models_comparison.csv', index=False)
print("\n‚úÖ Saved: tuned_models_comparison.csv")

params_df = pd.DataFrame(list(best_params.items()), columns=['Model', 'BestParams'])
params_df.to_csv('../Evaluation/best_hyperparameters.csv', index=False)
print("‚úÖ Saved: best_hyperparameters.csv")


‚úÖ Saved: tuned_models_comparison.csv
‚úÖ Saved: best_hyperparameters.csv
