In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [2]:
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1-12,tenure_group_13-24,tenure_group_25-36,tenure_group_37-48,tenure_group_49-60,tenure_group_61-72
0,0,0,29.85,29.85,0,True,False,False,True,True,...,False,False,True,False,True,False,False,False,False,False
1,1,0,56.95,1889.5,0,False,True,True,False,True,...,False,False,False,True,False,False,True,False,False,False
2,2,0,53.85,108.15,1,False,True,True,False,True,...,False,False,False,True,True,False,False,False,False,False
3,3,0,42.3,1840.75,0,False,True,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,4,0,70.7,151.65,1,True,False,True,False,True,...,False,False,True,False,True,False,False,False,False,False


In [3]:
df=df.drop('Unnamed: 0',axis=1)

In [5]:
X=df.drop('Churn',axis=1)

In [6]:
y=df['Churn']

## Train Test Split

In [16]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.33,random_state=42)

In [17]:
X_train.shape

(4711, 50)

In [18]:
y_train.shape

(4711,)

## Decision Tree Classifier

In [19]:
model_dt=DecisionTreeClassifier(criterion='gini',random_state=100,max_depth=6,min_samples_leaf=8)

In [20]:
model_dt.fit(X_train,y_train)

In [21]:
y_pred=model_dt.predict(X_test)

In [28]:
y_pred

array([0, 0, 1, ..., 0, 0, 1], dtype=int64)

In [29]:
from sklearn.metrics import accuracy_score

In [31]:
y_test

2476    0
6773    0
6116    1
3047    0
4092    0
       ..
1114    0
4949    0
298     0
3307    0
5576    1
Name: Churn, Length: 2321, dtype: int64

In [32]:
model_dt.score(X_test,y_test)

0.7746660922016372

In [None]:
print(classification_report(y_test, y_pred, labels=[0,1]))

In [34]:
print(classification_report(y_test,y_pred,labels=[0,1]))

              precision    recall  f1-score   support

           0       0.85      0.84      0.85      1711
           1       0.57      0.60      0.58       610

    accuracy                           0.77      2321
   macro avg       0.71      0.72      0.71      2321
weighted avg       0.78      0.77      0.78      2321



In [None]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_sample(x,y)

In [37]:
sm=SMOTEENN()
X_resampled,y_resampled=sm.fit_resample(X,y)

In [38]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [39]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [40]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.9148020654044751
              precision    recall  f1-score   support

           0       0.92      0.89      0.91       542
           1       0.91      0.93      0.92       620

    accuracy                           0.91      1162
   macro avg       0.92      0.91      0.91      1162
weighted avg       0.91      0.91      0.91      1162



In [41]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[485  57]
 [ 42 578]]


Now we can see quite better results, i.e. Accuracy: 92 %, and a very good recall, precision & f1 score for minority class.
Let's try with some other classifier.

## Random Forest Classifier

In [43]:
from sklearn.ensemble import RandomForestClassifier

In [45]:
model_rf=RandomForestClassifier(n_estimators=100,criterion='gini',random_state=100,max_depth=6,min_samples_leaf=8)

In [47]:
model_rf.fit(X_train,y_train)

In [49]:
y_pred=model_rf.predict(X_test)

In [50]:
model_rf.score(X_test,y_test)

0.7936234381732012

In [51]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.82      0.92      0.87      1711
           1       0.66      0.44      0.53       610

    accuracy                           0.79      2321
   macro avg       0.74      0.68      0.70      2321
weighted avg       0.78      0.79      0.78      2321



In [54]:
sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x,y)

In [55]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [56]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [57]:
model_rf_smote.fit(xr_train1,yr_train1)

In [58]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [59]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [60]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9358333333333333
              precision    recall  f1-score   support

           0       0.94      0.91      0.93       539
           1       0.93      0.96      0.94       661

    accuracy                           0.94      1200
   macro avg       0.94      0.93      0.93      1200
weighted avg       0.94      0.94      0.94      1200



In [61]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[491  48]
 [ 29 632]]


With RF Classifier, also we are able to get quite good results, infact better than Decision Tree.
We can now further go ahead and create multiple classifiers to see how the model performance is, but that's not covered here, so you can do it by yourself :)

## Pickling the model

In [62]:
import pickle

In [63]:
file_name='model.sav'

In [65]:
pickle.dump(model_rf_smote, open(file_name, 'wb'))

In [67]:
load_model = pickle.load(open(file_name, 'rb'))

In [68]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [69]:
model_score_r1

0.9358333333333333

Our final model i.e. RF Classifier with SMOTEENN, is now ready and dumped in model.sav, which we will use and prepare API's so that we can access our model from UI.