In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pickle

In [6]:
with open('./artifacts/df.pkl', 'rb') as file:
    loaded_data = pickle.load(file)

In [9]:
df = loaded_data
df

Unnamed: 0,AccountWeeks,DataUsage,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins,ContractRenewal,DataPlan,CustServCalls,Churn
0,0.705099,-0.453011,1.189742,0.483131,2.055854,-0.202459,-0.174752,1,1,1,0
1,0.152564,-0.453011,-0.480138,1.172095,1.604486,-0.240904,1.367827,1,1,1,0
2,0.941900,-0.453011,0.839632,0.695120,-0.329949,-1.829964,0.742457,1,0,0,0
3,-0.452594,-0.453011,1.743142,-1.583763,-0.007543,0.023618,-1.592257,0,0,2,0
4,-0.689395,-0.453011,-0.397854,0.642122,-1.039241,-1.249017,-0.133061,0,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...
5695,1.310257,0.541480,0.488608,-0.682809,0.490822,2.011430,1.447862,1,0,1,1
5696,-2.136512,-0.453011,2.015352,0.695120,1.095355,0.454180,0.391381,1,0,1,1
5697,1.020834,-0.453011,-0.817369,-0.576815,-1.248179,-1.106024,0.060939,0,0,3,1
5698,-0.794640,-0.162949,1.612410,0.377136,0.896696,0.875524,-0.152492,1,0,2,1


In [11]:
X = df.drop('Churn', axis=1)

In [12]:
y = df['Churn']

In [13]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [14]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((4275, 10), (1425, 10), (4275,), (1425,))

### Modeling

In [15]:
from sklearn.linear_model import LogisticRegression

In [16]:
classifier=LogisticRegression(max_iter=200)

In [17]:
classifier.fit(X_train, y_train)

In [20]:
y_pred = classifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 1, 0, 1], dtype=int64)

In [21]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [24]:
confusion_matrix(y_test, y_pred)

array([[592, 146],
       [146, 541]], dtype=int64)

In [25]:
accuracy_score(y_test, y_pred)

0.7950877192982456

In [27]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.80      0.80       738
           1       0.79      0.79      0.79       687

    accuracy                           0.80      1425
   macro avg       0.79      0.79      0.79      1425
weighted avg       0.80      0.80      0.80      1425



In [30]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

In [31]:
def evaluate_model(true, predicted):
    accuracy = accuracy_score(true, predicted)
    conf_matrix = confusion_matrix(true, predicted)
    report = classification_report(true,predicted)
    return accuracy, conf_matrix, report

In [33]:
models = {
    'Logistic Regression':LogisticRegression(),
    'SVC': SVC(),
    'Random Forest':RandomForestClassifier(),
    'K Neighbors':KNeighborsClassifier(),
    'Decision Tree':DecisionTreeClassifier(),
    'Gradient Boosting':GradientBoostingClassifier(),
    'Ada Boost': AdaBoostClassifier()
}

model_list = []
acc_list=[]

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train)

    # Make Predictions
    y_pred = model.predict(X_test)

    accuracy, conf_matrix, report =evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.values())[i])

    print('Model Training Performance')
    print("Accuracy:",accuracy)
    print("Confusion Matrix:\n",conf_matrix)
    print("Report",report)

    acc_list.append(accuracy)
    
    print('='*35)
    print('\n')

Logistic Regression
Model Training Performance
Accuracy: 0.7950877192982456
Confusion Matrix:
 [[592 146]
 [146 541]]
Report               precision    recall  f1-score   support

           0       0.80      0.80      0.80       738
           1       0.79      0.79      0.79       687

    accuracy                           0.80      1425
   macro avg       0.79      0.79      0.79      1425
weighted avg       0.80      0.80      0.80      1425



SVC
Model Training Performance
Accuracy: 0.8898245614035087
Confusion Matrix:
 [[657  81]
 [ 76 611]]
Report               precision    recall  f1-score   support

           0       0.90      0.89      0.89       738
           1       0.88      0.89      0.89       687

    accuracy                           0.89      1425
   macro avg       0.89      0.89      0.89      1425
weighted avg       0.89      0.89      0.89      1425



Random Forest
Model Training Performance
Accuracy: 0.9228070175438596
Confusion Matrix:
 [[672  66]
 [ 44 64