# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.pipeline import Pipeline

# Importing Our Data

In [2]:
train=pd.read_csv('customer_churn_dataset-training-master.csv')
train.shape

(440833, 12)

In [3]:
test=pd.read_csv('customer_churn_dataset-testing-master.csv')
test.shape

(64374, 12)

In [4]:
df=pd.concat([train,test],axis=0)

In [5]:
df.drop("CustomerID", axis = 1, inplace = True)

In [6]:
df.head()

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,30.0,Female,39.0,14.0,5.0,18.0,Standard,Annual,932.0,17.0,1.0
1,65.0,Female,49.0,1.0,10.0,8.0,Basic,Monthly,557.0,6.0,1.0
2,55.0,Female,14.0,4.0,6.0,18.0,Basic,Quarterly,185.0,3.0,1.0
3,58.0,Male,38.0,21.0,7.0,7.0,Standard,Monthly,396.0,29.0,1.0
4,23.0,Male,32.0,20.0,5.0,8.0,Basic,Monthly,617.0,20.0,1.0


In [7]:
df.dropna(inplace = True)

# Data Visualization

Age

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Age",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Age",data=df)
plt.tight_layout()
plt.show()

Gender

In [None]:
plt.subplot(1,2,1)
sns.countplot(x="Gender",data=df)
plt.subplot(1,2,2)
df["Gender"].value_counts().plot(kind='pie',autopct='%0.2f%%')
plt.tight_layout()
plt.show()

Tenure

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Tenure",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Tenure",data=df)
plt.tight_layout()
plt.show()

Usage Frequency

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Usage Frequency",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Usage Frequency",data=df)
plt.tight_layout()
plt.show()

Support Calls

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Support Calls",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Support Calls",data=df)
plt.tight_layout()
plt.show()

Payment Delay

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Payment Delay",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Payment Delay",data=df)
plt.tight_layout()
plt.show()

Subscription Type

In [None]:
plt.subplot(1,2,1)
sns.countplot(x="Subscription Type",data=df)
plt.subplot(1,2,2)
df["Subscription Type"].value_counts().plot(kind='pie',autopct='%0.2f%%')
plt.tight_layout()
plt.show()

Contract Length

In [None]:
plt.subplot(1,2,1)
sns.countplot(x="Contract Length",data=df)
plt.subplot(1,2,2)
df["Contract Length"].value_counts().plot(kind='pie',autopct='%0.2f%%')
plt.tight_layout()
plt.show()

Total Spend

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Total Spend",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Total Spend",data=df)
plt.tight_layout()
plt.show()

Last Interaction

In [None]:
plt.subplot(1,2,1)
sns.kdeplot(x="Last Interaction",data=df)
plt.grid()
plt.subplot(1,2,2)
sns.boxplot(x="Last Interaction",data=df)
plt.tight_layout()
plt.show()

In [None]:
sns.heatmap(df.select_dtypes(exclude='object').corr(),annot=True)
plt.show()

# Data Preprocessing

In [8]:
X=df.drop('Churn',axis=1)
Y=df['Churn']

In [9]:
X

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction
0,30.0,Female,39.0,14.0,5.0,18.0,Standard,Annual,932.0,17.0
1,65.0,Female,49.0,1.0,10.0,8.0,Basic,Monthly,557.0,6.0
2,55.0,Female,14.0,4.0,6.0,18.0,Basic,Quarterly,185.0,3.0
3,58.0,Male,38.0,21.0,7.0,7.0,Standard,Monthly,396.0,29.0
4,23.0,Male,32.0,20.0,5.0,8.0,Basic,Monthly,617.0,20.0
...,...,...,...,...,...,...,...,...,...,...
64369,45.0,Female,33.0,12.0,6.0,21.0,Basic,Quarterly,947.0,14.0
64370,37.0,Male,6.0,1.0,5.0,22.0,Standard,Annual,923.0,9.0
64371,25.0,Male,39.0,14.0,8.0,30.0,Premium,Monthly,327.0,20.0
64372,50.0,Female,18.0,19.0,7.0,22.0,Standard,Monthly,540.0,13.0


In [10]:
for i in X.select_dtypes(include='object').columns:
    X[i]=X[i].map(X[i].value_counts(normalize=True))

In [11]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

# Model Deployment

In [12]:
from sklearn.metrics import accuracy_score

Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr=LogisticRegression()

In [None]:
lr.fit(X_train,Y_train)    
Y_pred = lr.predict(X_test)  

In [None]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

Desicion Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt=DecisionTreeClassifier()

In [None]:
dt.fit(X_train,Y_train)    
Y_pred = dt.predict(X_test)  

In [None]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

Random Forest

In [15]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier()

In [None]:
rf.fit(X_train,Y_train)    
Y_pred = rf.predict(X_test)  

In [None]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn=KNeighborsClassifier()

In [None]:
knn.fit(X_train,Y_train)
Y_pred = knn.predict(X_test)  

In [None]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
gnb=GaussianNB()

In [None]:
gnb.fit(X_train,Y_train)    
Y_pred = gnb.predict(X_test)  

In [None]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

According to The Accuracies We're gonna use Random Forest

In [16]:
model = RandomForestClassifier(n_estimators = 100, max_depth = 20, min_samples_split = 10, random_state = 10)

In [17]:
model.fit(X_train, Y_train)

RandomForestClassifier(max_depth=20, min_samples_split=10, random_state=10)

In [18]:
Y_pred = model.predict(X_test)

In [19]:
print("The accuracy is",accuracy_score(Y_pred,Y_test)) 

The accuracy is 0.935036915342135


In [20]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred))

              precision    recall  f1-score   support

         0.0       1.00      0.85      0.92     44981
         1.0       0.90      1.00      0.94     56061

    accuracy                           0.94    101042
   macro avg       0.95      0.93      0.93    101042
weighted avg       0.94      0.94      0.93    101042



# Exporting The Model

In [21]:
import pickle

In [22]:
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Deployment

In [None]:
!streamlit run Deployment.py