In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [4]:
# 1️⃣ Load Preprocessed Data
# ---------------------------
df = pd.read_csv('../Data/preprocessed.csv')
print('Shape:', df.shape)
df.head()

Shape: (3000, 7)


Unnamed: 0,Income,Kidhome,Recency,MntWines,NumWebPurchases,TotalChildren,Response
0,1.883733,1.230379,-0.807246,1.358131,1.4214,0.874273,1
1,-0.82082,-1.224651,-0.021877,-1.393109,-0.845948,-0.003805,0
2,0.582383,0.002864,-0.738953,0.623531,1.648135,-0.881883,1
3,-0.448523,1.230379,1.992767,-0.850347,-0.845948,1.752351,0
4,1.197684,0.002864,-1.182858,0.960418,1.87487,0.874273,1


In [5]:
# 2️⃣ Features and Target
# ---------------------------
X = df.drop('Response', axis=1)
y = df['Response']

# Train/Test Split (again, to be sure)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print('Train:', X_train.shape, 'Test:', X_test.shape)

Train: (2400, 6) Test: (600, 6)


In [6]:
# 3️⃣ Initialize Models
# ---------------------------
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

In [7]:
# 4️⃣ Train & Evaluate
# ---------------------------
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1
    })
    
    print(f"\n✅ {name} Report:")
    print(classification_report(y_test, y_pred))
    print('Confusion Matrix:')
    print(confusion_matrix(y_test, y_pred))


✅ Logistic Regression Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       300
           1       1.00      1.00      1.00       300

    accuracy                           1.00       600
   macro avg       1.00      1.00      1.00       600
weighted avg       1.00      1.00      1.00       600

Confusion Matrix:
[[300   0]
 [  0 300]]

✅ SVM Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       300
           1       1.00      1.00      1.00       300

    accuracy                           1.00       600
   macro avg       1.00      1.00      1.00       600
weighted avg       1.00      1.00      1.00       600

Confusion Matrix:
[[300   0]
 [  0 300]]

✅ Random Forest Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       300
           1       1.00      1.00      1.00       300

    accuracy                 

In [8]:
# 5️⃣ Compare All Models
# ---------------------------
results_df = pd.DataFrame(results)
print('\n📊 Model Comparison:')
print(results_df.sort_values(by="F1-Score", ascending=False).reset_index(drop=True))


📊 Model Comparison:
                 Model  Accuracy  Precision  Recall  F1-Score
0  Logistic Regression       1.0        1.0     1.0       1.0
1                  SVM       1.0        1.0     1.0       1.0
2        Random Forest       1.0        1.0     1.0       1.0
3             AdaBoost       1.0        1.0     1.0       1.0
4    Gradient Boosting       1.0        1.0     1.0       1.0
