In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve, confusion_matrix, classification_report, precision_recall_curve, roc_auc_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

In [2]:
marketing = pd.read_csv('data/marketing_features.csv')
marketing.columns

Index(['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome',
       'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth',
       'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'Response', 'Complain', 'Country', 'Age',
       'days_since_joining', 'Age_bin', 'Income_bin', 'People_at_home',
       'Income_per_person', 'Response1', '2n Cycle', 'Basic', 'Graduation',
       'Master', 'PhD', 'Absurd', 'Alone', 'Divorced', 'Married', 'Single',
       'Together', 'Widow', 'YOLO', 'AUS', 'CA', 'GER', 'IND', 'ME', 'SA',
       'SP', 'US', 'Age_35_44', 'Age_45_59', 'Age_above59', 'Age_under35',
       'Income_35k_65k', 'Income_above65k', 'Income_under35k'],
      dtype='object')

## Decision Trees

In [3]:
X1 = marketing[['Age','Income','Kidhome','Teenhome','Recency','NumWebVisitsMonth','days_since_joining','Income_per_person','People_at_home']]
X2 = marketing.iloc[:,35:]
X = pd.concat([X1,X2], axis=1)
y = marketing['Response']

In [4]:
## Train Val Test Split (80-10-10)
X_train_int, X_test, y_train_int, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_int, y_train_int, test_size=0.1111, random_state=42)

In [5]:
dt = DecisionTreeClassifier(max_depth=5, min_samples_split=20, random_state=42)
dt.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=5, min_samples_split=20, random_state=42)

In [6]:
y_val_pred = dt.predict(X_val)

In [7]:
print('Confusion Matrix:')
print(confusion_matrix(y_val, y_val_pred))
print('Classification Report:')
print(classification_report(y_val, y_val_pred))
print('AUC ROC:')
print(roc_auc_score(y_val,y_val_pred))

Confusion Matrix:
[[144  18]
 [ 36  26]]
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.89      0.84       162
           1       0.59      0.42      0.49        62

    accuracy                           0.76       224
   macro avg       0.70      0.65      0.67       224
weighted avg       0.74      0.76      0.74       224

AUC ROC:
0.6541218637992832


## Random Forests

In [8]:
n_est = [100]
depth = [5,8,10,12,14,16,18,20]
for i in n_est:
    for j in depth:
        rf = RandomForestClassifier(n_estimators=i, max_depth=j, random_state=42)
        rf.fit(X_train, y_train)
        y_val_pred = rf.predict(X_val)
        f1 = round(f1_score(y_val, y_val_pred),4)
        auc_roc = round(roc_auc_score(y_val, y_val_pred),4)
        print('n:%s,\t depth:%s,\t f1:%s,\t auc_roc:%s' %(i,j,f1,auc_roc))

n:100,	 depth:5,	 f1:0.4948,	 auc_roc:0.6596
n:100,	 depth:8,	 f1:0.551,	 auc_roc:0.69
n:100,	 depth:10,	 f1:0.5263,	 auc_roc:0.6769
n:100,	 depth:12,	 f1:0.5545,	 auc_roc:0.6919
n:100,	 depth:14,	 f1:0.5524,	 auc_roc:0.6907
n:100,	 depth:16,	 f1:0.5577,	 auc_roc:0.6937
n:100,	 depth:18,	 f1:0.5385,	 auc_roc:0.6826
n:100,	 depth:20,	 f1:0.5631,	 auc_roc:0.6968


In [9]:
rf = RandomForestClassifier(n_estimators=20, max_depth=14, random_state=42)
rf.fit(X_train, y_train)
y_val_pred = rf.predict(X_val)

In [10]:
print('Confusion Matrix:')
print(confusion_matrix(y_val, y_val_pred))
print('Classification Report:')
print(classification_report(y_val, y_val_pred))
print('AUC ROC:')
print(roc_auc_score(y_val,y_val_pred))

Confusion Matrix:
[[149  13]
 [ 35  27]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.92      0.86       162
           1       0.68      0.44      0.53        62

    accuracy                           0.79       224
   macro avg       0.74      0.68      0.70       224
weighted avg       0.77      0.79      0.77       224

AUC ROC:
0.6776184786937475


## XGB

In [11]:
n_est = [30]
depth = [3,5,8,10,12,14,16,18,20]
for i in n_est:
    for j in depth:
        xgb = XGBClassifier(n_estimators=i, max_depth=j, random_state=42)
        xgb.fit(X_train, y_train)
        y_val_pred = xgb.predict(X_val)
        f1 = round(f1_score(y_val, y_val_pred),4)
        auc_roc = round(roc_auc_score(y_val, y_val_pred),4)
        print('n:%s,\t depth:%s,\t f1:%s,\t auc_roc:%s' %(i,j,f1,auc_roc))

n:30,	 depth:3,	 f1:0.5243,	 auc_roc:0.6745
n:30,	 depth:5,	 f1:0.4902,	 auc_roc:0.6553
n:30,	 depth:8,	 f1:0.4954,	 auc_roc:0.656
n:30,	 depth:10,	 f1:0.5,	 auc_roc:0.6591
n:30,	 depth:12,	 f1:0.5,	 auc_roc:0.6591
n:30,	 depth:14,	 f1:0.486,	 auc_roc:0.651
n:30,	 depth:16,	 f1:0.5185,	 auc_roc:0.6703
n:30,	 depth:18,	 f1:0.5273,	 auc_roc:0.6752
n:30,	 depth:20,	 f1:0.537,	 auc_roc:0.6814


In [12]:
xgb = XGBClassifier(n_estimators=30, max_depth=5, random_state=42)
xgb.fit(X_train, y_train)
y_val_pred = xgb.predict(X_val)



In [13]:
print('Confusion Matrix:')
print(confusion_matrix(y_val, y_val_pred))
print('Classification Report:')
print(classification_report(y_val, y_val_pred))
print('AUC ROC:')
print(roc_auc_score(y_val,y_val_pred))

Confusion Matrix:
[[147  15]
 [ 37  25]]
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.91      0.85       162
           1       0.62      0.40      0.49        62

    accuracy                           0.77       224
   macro avg       0.71      0.66      0.67       224
weighted avg       0.75      0.77      0.75       224

AUC ROC:
0.6553166069295102


## Test Performance

### Decision Tree

In [14]:
y_test_pred = dt.predict(X_test)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_test_pred))
print('Classification Report:')
print(classification_report(y_test, y_test_pred))
print('AUC ROC:')
print(roc_auc_score(y_test,y_test_pred))

Confusion Matrix:
[[140  29]
 [ 31  24]]
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.83      0.82       169
           1       0.45      0.44      0.44        55

    accuracy                           0.73       224
   macro avg       0.64      0.63      0.63       224
weighted avg       0.73      0.73      0.73       224

AUC ROC:
0.6323830016137708


## Random Forests

In [15]:
y_test_pred = rf.predict(X_test)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_test_pred))
print('Classification Report:')
print(classification_report(y_test, y_test_pred))
print('AUC ROC:')
print(roc_auc_score(y_test,y_test_pred))

Confusion Matrix:
[[151  18]
 [ 32  23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.89      0.86       169
           1       0.56      0.42      0.48        55

    accuracy                           0.78       224
   macro avg       0.69      0.66      0.67       224
weighted avg       0.76      0.78      0.76       224

AUC ROC:
0.6558364712210867


## XGBoost 

In [16]:
y_test_pred = xgb.predict(X_test)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_test_pred))
print('Classification Report:')
print(classification_report(y_test, y_test_pred))
print('AUC ROC:')
print(roc_auc_score(y_test,y_test_pred))

Confusion Matrix:
[[150  19]
 [ 35  20]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       169
           1       0.51      0.36      0.43        55

    accuracy                           0.76       224
   macro avg       0.66      0.63      0.64       224
weighted avg       0.74      0.76      0.74       224

AUC ROC:
0.6256051640667026
