In [4]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [5]:
# 1Ô∏è‚É£ Load Preprocessed Data
# ---------------------------
df = pd.read_csv('../Data/preprocessed.csv')
X = df.drop('Response', axis=1)
y = df['Response']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [6]:
# 2Ô∏è‚É£ Define Models
# ---------------------------
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(probability=True),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

In [7]:
# 3Ô∏è‚É£ Train & Evaluate All Models
# ---------------------------
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1,
        "TrainedModel": model
    })

In [8]:
# 4Ô∏è‚É£ Pick Best Model by F1-Score
# ---------------------------
results_df = pd.DataFrame(results).drop(columns=['TrainedModel'])
best_row = results_df.loc[results_df['F1-Score'].idxmax()]
best_model_name = best_row['Model']
print(f"‚úÖ Best Model: {best_model_name}")
print('\nüìä Model Comparison:')
print(results_df.sort_values(by="F1-Score", ascending=False).reset_index(drop=True))

‚úÖ Best Model: Logistic Regression

üìä Model Comparison:
                 Model  Accuracy  Precision  Recall  F1-Score
0  Logistic Regression       1.0        1.0     1.0       1.0
1                  SVM       1.0        1.0     1.0       1.0
2        Random Forest       1.0        1.0     1.0       1.0
3             AdaBoost       1.0        1.0     1.0       1.0
4    Gradient Boosting       1.0        1.0     1.0       1.0


In [9]:
# 5Ô∏è‚É£ Save Best Model
# ---------------------------
best_model_obj = results[results_df['F1-Score'].idxmax()]['TrainedModel']
joblib.dump(best_model_obj, '../App/model/model.pkl')

print(f"\n‚úÖ Best model '{best_model_name}' saved to ../App/model/model.pkl")


‚úÖ Best model 'Logistic Regression' saved to ../App/model/model.pkl


In [10]:
# 6Ô∏è‚É£ Save Results Table (optional)
# ---------------------------
results_df.to_csv('../Evaluation/model_comparison.csv', index=False)
print("‚úÖ Model comparison table saved to ../Evaluation/model_comparison.csv")

‚úÖ Model comparison table saved to ../Evaluation/model_comparison.csv
