# Decision Tree Classifier
## Machine Learning Model Training and Evaluation

In [109]:
import pandas as pd

In [110]:
# training df 
df_train = pd.read_csv('../../data/train/customer_churn_dataset-training-clean.csv')
#test df 
df_test = pd.read_csv('../../data/test/customer_churn_dataset-testing-clean.csv')

In [111]:
# training data setting fitting variables and predictor
X_train = df_train[['age', 'gender', 'tenure', 'usage_frequency', 'support_calls',
       'payment_delay', 'subscription_type', 'contract_length', 'total_spend',
       'last_interaction']]
y_train = df_train['churn']

# test data setting fitting variables and predictor
X_test = df_test[['age', 'gender', 'tenure', 'usage_frequency', 'support_calls',
       'payment_delay', 'subscription_type', 'contract_length', 'total_spend',
       'last_interaction']]
y_test = df_test['churn']

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(440832, 10)
(64374, 10)
(440832,)
(64374,)


In [112]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
#dtC = DecisionTreeClassifier(max_depth=3, class_weight='balanced', random_state=42)
#dtC.fit(X_train, y_train)

In [None]:
dtC = DecisionTreeClassifier(max_depth= 3, class_weight='balanced', random_state=42)

In [None]:
# attempting SMOTE to reduce class imbalance
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
dtC.fit(X_train_resampled, y_train_resampled)

In [124]:
from sklearn.model_selection import cross_val_score
dtC_train_cv_scores = cross_val_score(dtC, X_train, y_train, cv=5)
print("Cross-validation scores:", dtC_train_cv_scores)

Cross-validation scores: [0.92319122 0.92179614 0.92152304 0.92185196 0.92264592]


In [125]:
dtC_test_cv_scores = cross_val_score(dtC, X_test, y_test, cv=5)
print("Cross-validation scores:", dtC_test_cv_scores)

Cross-validation scores: [0.89180583 0.88512621 0.88116505 0.88807767 0.88232096]


In [126]:
dtC_test_score = dtC.score(X_test, y_test)
print("Test score:", dtC_test_score)

Test score: 0.5853139466244136


**Overfitting**, but promising train results.

# Random Forest Decision Tree

In [117]:
from sklearn.ensemble import RandomForestClassifier

rfC = RandomForestClassifier(max_depth=3, class_weight='balanced', random_state=42)
rfC.fit(X_train, y_train)

In [118]:
from sklearn.model_selection import cross_val_score
rfC_train_cv_scores = cross_val_score(rfC, X_train, y_train, cv=5)
print("Cross-validation scores:", rfC_train_cv_scores)

Cross-validation scores: [0.97223451 0.97106627 0.97181453 0.9734705  0.97250641]


In [119]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_train = rfC.predict(X_train)
print(classification_report(y_train, y_pred_train))
print(confusion_matrix(y_train, y_pred_train))

              precision    recall  f1-score   support

           0       0.95      0.99      0.97    190833
           1       0.99      0.96      0.98    249999

    accuracy                           0.97    440832
   macro avg       0.97      0.97      0.97    440832
weighted avg       0.97      0.97      0.97    440832

[[189516   1317]
 [ 10887 239112]]


In [120]:
rfC_test_cv_scores = cross_val_score(rfC, X_test, y_test, cv=5)
print("Cross-validation scores:", rfC_test_cv_scores)
rfC_test_score = rfC.score(X_test, y_test)
print("Test score:", rfC_test_score)

Cross-validation scores: [0.91852427 0.91479612 0.90912621 0.91370874 0.91548858]
Test score: 0.5313014571100133


In [121]:
y_pred = rfC.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.11      0.20     33881
           1       0.50      1.00      0.67     30493

    accuracy                           0.53     64374
   macro avg       0.74      0.55      0.43     64374
weighted avg       0.75      0.53      0.42     64374

[[ 3782 30099]
 [   73 30420]]


Likewise, **overfitting**, but promising train results.