## 1. order libraries

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd 
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import recall_score , confusion_matrix , precision_score , f1_score , accuracy_score , classification_report


## 2. show data

In [None]:
df = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
df.head(10)

## 3. check any type data and any missing data

In [None]:
df.info()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
# drop customerId column
df = df.drop(['customerID'], axis = 1) 
df.head()

## 4. plot data

In [None]:
# create a small table for plot classification
data_to_plot= df.groupby(['SeniorCitizen','gender']).size().unstack(fill_value = 0)

In [None]:
data_to_plot

In [None]:
# lightskyblue is a kind facecolor and figsize is size image 
fig , ax = plt.subplots(facecolor = 'lightskyblue',figsize = (16 ,16))
data_to_plot.plot(kind = 'bar',ax = ax)


In [None]:
data_to_plot2 = df.groupby(['Partner','gender']).size()

In [None]:
data_to_plot2

In [None]:
fig , ax = plt.subplots(facecolor = 'lightskyblue',figsize = (16,16)) 
data_to_plot.plot(kind = 'bar',ax = ax)

In [None]:
data_to_plot3 = df.groupby(['PhoneService','gender']).size()


In [None]:
data_to_plot3

In [None]:
fig , ax = plt.subplots(facecolor = 'lightskyblue',figsize = (16,16)) 
data_to_plot3.plot(kind = 'bar',  ax = ax)

In [None]:
data_to_plot4 = df.groupby(['tenure','gender']).size().unstack(fill_value = 0)

In [None]:
data_to_plot4

In [None]:
sns.lineplot(data = data_to_plot4 )

In [None]:
data_to_plot5 = df.groupby(['PaymentMethod','gender']).size().unstack(fill_value = 0)

In [None]:
data_to_plot5

In [None]:
sns.lineplot(data  = data_to_plot5)

## 5. data optimization

In [None]:
# transform columns object to a columns interger
def object_to_int(dataframe_series):
    if dataframe_series.dtype == 'object':
        dataframe_series = LabelEncoder().fit_transform(dataframe_series) 
    return dataframe_series
        

In [None]:
df = df.apply(lambda x: object_to_int(x)) 
df.head()

In [None]:
X = df.drop(columns = ['Churn']) 
y = df['Churn'].values

In [None]:
# Divide data for training and testing 
X_train , X_test , y_train , y_test = train_test_split(X,y ,test_size = 0.30 , random_state = 40 , stratify = y)

In [None]:
num_cols = ['tenure', 'MonthlyCharges','TotalCharges']


In [None]:
scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols]) 
X_test[num_cols] = scaler.transform(X_test[num_cols])

## 6. sklearn models

In [None]:
# logisteregression model
model = LogisticRegression()
model.fit(X_train , y_train) 
accuary = model.score(X_test , y_test)
print('Logistic Regression accuracy is :',accuary)

In [None]:
# predicted logistergression model
pred = model.predict(X_test) 
report = classification_report(y_test  , pred)
print(report)


In [None]:
# plot predicted logictergression
plt.figure(figsize = (4,3)) 
sns.heatmap(confusion_matrix(y_test , pred), annot = True , fmt = 'd')
plt.title('LOGISTIC REGRESSION CONFUSION MATRIX')
plt.show()

In [None]:
# KNN model
knn_model = KNeighborsClassifier(n_neighbors = 20)
knn_model.fit(X_train , y_train) 
accuracy_knn = knn_model.score(X_test , y_test)
print('KNN accuarcy',accuracy_knn)

In [None]:
# predicted KNN model
predicted_y = knn_model.predict(X_test)
print(classification_report(y_test , predicted_y)) 


In [None]:
# plot predicted KNN 
plt.figure(figsize = (4,3)) 
sns.heatmap(confusion_matrix(y_test ,predicted_y),annot = True, fmt = 'd')
plt.title("KNN CONFUSION Matrix")
plt.show()

In [None]:
# SVC model
svc_model = SVC(random_state = 15) 
svc_model.fit(X_train,y_train) 
accuracy_svc = svc_model.score(X_test,y_test)
print('SVC accuracy is:', accuracy_svc)

In [None]:
# SVC predicted 
predict_y = svc_model.predict(X_test)
print(classification_report(y_test,predict_y))

In [None]:
# plot predicted SVC
plt.figure(figsize = (4,3))
sns.heatmap(confusion_matrix(y_test , predict_y),annot = True , fmt = 'd')
plt.title("SCV CONFUSION MATRIX")
plt.show()
