<a href="https://colab.research.google.com/github/silversilencee/Customer_Churn/blob/main/Telco_Customer_Churn_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing all required libraries**

In [None]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression


from sklearn.metrics import classification_report
from imblearn.combine import SMOTEENN
import pickle

# **Importing the dataset**

In [None]:
df = pd.read_csv('telecom_churn.csv')
df.head(20)

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.7,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.7,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.0,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.0,2,299.4,71,57.0,3.1,6.6
4,0,75,0,0,0.0,3,166.7,113,41.0,7.42,10.1
5,0,118,0,0,0.0,0,223.4,98,57.0,11.03,6.3
6,0,121,1,1,2.03,3,218.2,88,87.3,17.43,7.5
7,0,147,0,0,0.0,0,157.0,79,36.0,5.16,7.1
8,0,117,1,0,0.19,1,184.5,97,63.9,17.58,8.7
9,0,141,0,1,3.02,0,258.6,84,93.2,11.1,11.2


# **Creating the x and y variables to train the model**

Now we select
*  **x = we get all the columns**
*  **y = churn**
then split the data set as *training* and *testing* dataset 

In [None]:
x = df.iloc[:,1:11]
y = df.iloc[:,[0]]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# **Decision Tree Classifier**

In [None]:
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)

In [None]:
dt.score(x_test,y_test)

0.8875562218890555

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93       566
           1       0.62      0.67      0.64       101

    accuracy                           0.89       667
   macro avg       0.78      0.80      0.79       667
weighted avg       0.89      0.89      0.89       667



Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xr_train,xr_test,yr_train,yr_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

dts = DecisionTreeClassifier()
dts.fit(xr_train,yr_train)
yr_pred = dts.predict(xr_test)

In [None]:
dts.score(xr_test,yr_test)

0.8947368421052632

In [None]:
print(classification_report(yr_test, yr_pred))

              precision    recall  f1-score   support

           0       0.88      0.86      0.87       345
           1       0.90      0.92      0.91       491

    accuracy                           0.89       836
   macro avg       0.89      0.89      0.89       836
weighted avg       0.89      0.89      0.89       836



We save the model as a pickle file.

In [None]:
filename = 'decisiontree_sm.sav'
pickle.dump(dts, open(filename, 'wb'))

# **Random Forest Classifier**

In [None]:
rf = RandomForestClassifier()
rf.fit(x_train,y_train)
y_pred = rf.predict(x_test)

  


In [None]:
rf.score(x_test,y_test)

0.9385307346326837

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.99      0.96       566
           1       0.91      0.66      0.77       101

    accuracy                           0.94       667
   macro avg       0.92      0.83      0.87       667
weighted avg       0.94      0.94      0.93       667



Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xf_train,xf_test,yf_train,yf_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

rfs = RandomForestClassifier()
rfs.fit(xf_train,yf_train)
yf_pred = rfs.predict(xf_test)

In [None]:
rfs.score(xf_test,yf_test)

In [None]:
print(classification_report(yf_test,yf_pred))

We save the model as a pickle file.

In [None]:
filename = 'randomforest_sm.sav'
pickle.dump(rfs, open(filename, 'wb'))

# **Naive Bayes Classifier**

In [None]:
nb = GaussianNB()
nb.fit(x_train,y_train)
y_pred = nb.predict(x_test)

  y = column_or_1d(y, warn=True)


In [None]:
nb.score(x_test,y_test)

0.8455772113943029

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.90      0.92      0.91       566
           1       0.49      0.43      0.46       101

    accuracy                           0.85       667
   macro avg       0.69      0.67      0.68       667
weighted avg       0.84      0.85      0.84       667



Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xb_train,xb_test,yb_train,yb_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

nbs = DecisionTreeClassifier()
nbs.fit(xb_train,yb_train)
yb_pred = nbs.predict(xb_test)

In [None]:
nbs.score(xb_test,yb_test)

0.9081015719467956

In [None]:
print(classification_report(yb_test,yb_pred))

              precision    recall  f1-score   support

           0       0.89      0.89      0.89       353
           1       0.92      0.92      0.92       474

    accuracy                           0.91       827
   macro avg       0.91      0.91      0.91       827
weighted avg       0.91      0.91      0.91       827



We save the model as a pickle file.


In [None]:
filename = 'naivebayes_sm.sav'
pickle.dump(nbs, open(filename, 'wb'))

# **K-Nearest Neighbours**

In [None]:
knn = KNeighborsClassifier()
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)

In [None]:
knn.score(x_test,y_test)

In [None]:
print(classification_report(y_test,y_pred))

Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xn_train,xn_test,yn_train,yn_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

knns = KNeighborsClassifier()
knns.fit(xn_train,yn_train)
yn_pred = knns.predict(xn_test)

In [None]:
knns.score(xn_test,yn_test)

In [None]:
print(classification_report(yn_test,yn_pred))

Since the score is so less we cannot use the model.

# **Kernel SVM**

In [None]:
svm = SVC()
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

In [None]:
svm.score(x_test,y_test)

0.8515742128935532

In [None]:
print(classification_report(y_test,y_pred))

Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xm_train,xm_test,ym_train,ym_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

svms = SVC()
svms.fit(xm_train,ym_train)
ym_pred = svms.predict(xm_test)

In [None]:
svms.score(xm_test,ym_test)

In [None]:
print(classification_report(ym_test,ym_pred))

Since the score is so less we cannot use the model.

# **Logistic Regression**

In [None]:
lr = LogisticRegression()
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)

In [None]:
lr.score(x_test,y_test)

0.8515742128935532

In [None]:
print(classification_report(y_test,y_pred))

Since there is less data we get poor score in order to increase the accuracy we use smoteen to over-sample the data or upscale the data.

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x,y)

xr_train,xr_test,yr_train,yr_test=train_test_split(x_resampled, y_resampled,test_size=0.2)

lrs = SVC()
lrs.fit(xr_train,yr_train)
yr_pred = lrs.predict(xr_test)

  y = column_or_1d(y, warn=True)


In [None]:
lrs.score(xr_test,yr_test)

In [None]:
print(classification_report(yr_test,yr_pred))

Since the score is so less we cannot use the model.