### Importing Packages

In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN
from sklearn.ensemble import RandomForestClassifier

### Loading the Dataset

In [2]:
data = pd.read_csv('Dataset\Telcom_Churn.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenureGroup_1 - 12,tenureGroup_13 - 24,tenureGroup_25 - 36,tenureGroup_37 - 48,tenureGroup_49 - 60,tenureGroup_61 - 72
0,0,0,29.85,29.85,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,0
1,1,0,56.95,1889.5,0,0,1,1,0,1,...,0,0,0,1,0,0,1,0,0,0
2,2,0,53.85,108.15,1,0,1,1,0,1,...,0,0,0,1,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,0,1,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,1,0,1,0,1,...,0,0,1,0,1,0,0,0,0,0


### Assigning X and y 

In [3]:
data = data.drop('Unnamed: 0',axis=1)

In [4]:
X = data.drop('Churn',axis=1)
X.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenureGroup_1 - 12,tenureGroup_13 - 24,tenureGroup_25 - 36,tenureGroup_37 - 48,tenureGroup_49 - 60,tenureGroup_61 - 72
0,0,29.85,29.85,1,0,0,1,1,0,1,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.5,0,1,1,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,0,1,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.3,1840.75,0,1,1,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
4,0,70.7,151.65,1,0,1,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0


In [5]:
y = data['Churn']
y.head()

0    0
1    0
2    1
3    0
4    1
Name: Churn, dtype: int64

### Splitting the data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

### Decision Tree

In [7]:
DTmodel = DecisionTreeClassifier(criterion='gini',random_state=100,max_depth=6,min_samples_leaf=8)
DTmodel.fit(X_train,y_train)
y_pred = DTmodel.predict(X_test)
print('Score: ',DTmodel.score(X_test,y_test))
print('\nClassification Report\n',classification_report(y_test,y_pred,labels=[0,1]))

Score:  0.7810945273631841

Classification Report
               precision    recall  f1-score   support

           0       0.84      0.87      0.85      1031
           1       0.60      0.53      0.56       376

    accuracy                           0.78      1407
   macro avg       0.72      0.70      0.71      1407
weighted avg       0.77      0.78      0.78      1407



Observation:
1. We haven't checked accuracy because the dataset is highly imbalanced.
2. The presision score in class 1 i.e. Churned customers is low.

Hence we need do go ahead with SMOTEENN (SMOTE + ENN)

In [8]:
# SMOTEENN (UpSampling)
sm = SMOTEENN()
X_resampled,y_resampled = sm.fit_resample(X,y)

# Splitting
Xr_train,Xr_test,yr_train,yr_test = train_test_split(X_resampled, y_resampled,test_size=0.2)

# Model
DTmodel_smote = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
DTmodel_smote.fit(Xr_train,yr_train)
yr_pred = DTmodel_smote.predict(Xr_test)
print('Score: ',DTmodel_smote.score(Xr_test, yr_test))
print('\n\n',metrics.classification_report(yr_test, yr_pred))

Score:  0.9437340153452686


               precision    recall  f1-score   support

           0       0.94      0.94      0.94       529
           1       0.95      0.95      0.95       644

    accuracy                           0.94      1173
   macro avg       0.94      0.94      0.94      1173
weighted avg       0.94      0.94      0.94      1173



In [9]:
print(metrics.confusion_matrix(yr_test, yr_pred))

[[497  32]
 [ 34 610]]


### Random Forest

In [11]:
RFmodel = RandomForestClassifier()
RFmodel = RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)
RFmodel.fit(X_train,y_train)
y_pred = RFmodel.predict(X_test)
print('Score: ',RFmodel.score(X_test,y_test))
print('Classification Report\n\n',classification_report(y_test, y_pred, labels=[0,1]))

Score:  0.7874911158493249
Classification Report

               precision    recall  f1-score   support

           0       0.82      0.91      0.86      1031
           1       0.65      0.44      0.52       376

    accuracy                           0.79      1407
   macro avg       0.73      0.68      0.69      1407
weighted avg       0.77      0.79      0.77      1407



In [12]:
# SMOTEENN
sm = SMOTEENN()
X_resampled,y_resampled = sm.fit_resample(X,y)

# Splitting
Xr_train1,Xr_test1,yr_train1,yr_test1 = train_test_split(X_resampled, y_resampled,test_size=0.2)

# Model
RFmodel_smote = RandomForestClassifier(n_estimators=100,criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)
RFmodel_smote.fit(Xr_train1,yr_train1)
yr_pred1 = RFmodel_smote.predict(Xr_test1)
print('Score: ',RFmodel_smote.score(Xr_test1, yr_test1))
print('\n\n',metrics.classification_report(yr_test1, yr_pred1))

Score:  0.9400171379605827


               precision    recall  f1-score   support

           0       0.96      0.91      0.93       521
           1       0.93      0.97      0.95       646

    accuracy                           0.94      1167
   macro avg       0.94      0.94      0.94      1167
weighted avg       0.94      0.94      0.94      1167



In [14]:
print(metrics.confusion_matrix(yr_test1, yr_pred1))

[[473  48]
 [ 22 624]]


### Saving the model

In [16]:
import pickle
filename = 'model.sav'
pickle.dump(RFmodel_smote, open(filename, 'wb'))
load_model = pickle.load(open(filename, 'rb'))
model_score_r1 = load_model.score(Xr_test1, yr_test1)
model_score_r1

0.9400171379605827