In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score
from sklearn import tree
import graphviz

## Data Preprocessig

In [None]:
df = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head(5)

In [None]:
df.dtypes

In [None]:
custid = ['customerID']
target = ['Churn']
categorical = df.nunique()[df.nunique()<10].keys().tolist()


In [None]:
numerical = [col for col in df.columns if col not in custid + target + categorical]

In [None]:
df['TotalCharges'] = df['TotalCharges'].replace({" ":'0'})
df['TotalCharges'] = df['TotalCharges'].astype(float)

In [None]:
scaler = StandardScaler()
scaled = scaler.fit_transform(df[numerical])
scaled = pd.DataFrame(scaled , columns=numerical )

In [None]:
df = pd.get_dummies(data = df,columns = categorical,drop_first = True)

In [None]:
df = df.drop(columns = numerical , axis=1)
df = df.merge(right = scaled,
             how = 'left',
             left_index = True,
             right_index = True)
df.rename(columns={'Churn_Yes': 'Churn' } ,inplace=True)
df.head()

In [None]:
y = df['Churn']
X = df.drop(['Churn','customerID'] , axis = 1)

In [None]:
X_train , X_test , y_train,y_test = train_test_split(X,y,test_size = 0.25)

## Model and Testing

In [None]:
#Churn Distribution
df.groupby(['Churn']).size() / df.shape[0] *100

In [None]:
model = DecisionTreeClassifier().fit(X_train , y_train)

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision = round(precision_score(y_test,y_pred),3)
recall = round(recall_score(y_test,y_pred),3)
print('Test accuracy : ' ,round(accuracy,3) )
print('Test precision : ' , precision)
print('Test recall : ' , recall)

In [None]:
log_model = LogisticRegression().fit(X_train,y_train)

In [None]:
y_pred_log = log_model.predict(X_test)
accuracy_log = accuracy_score(y_test,y_pred_log)
precision_log = round(precision_score(y_test,y_pred),3)
recall_log = round(recall_score(y_test,y_pred),3)
print('LogisticRegression Test accuracy : ' ,round(accuracy_log,3) )
print('LogisticRegression Test precision : ' , precision_log)
print('LogisticRegression Test recall : ' , recall_log)

## Model Tuning

In [None]:
C =[1, 0.5, 0.25, 0.1, 0.05, 0.025, 0.01, 0.005, 0.0025]
metrics = np.zeros((len(C) ,4))
metrics[:,0] = C
for index in range(0,len(C)):
    log_model = LogisticRegression(penalty = 'l1' , C = C[index] ,solver='liblinear').fit(X_train,y_train)
    y_pred = log_model.predict(X_test)
    metrics[index,1 ] =accuracy_score(y_test,y_pred)
    metrics[index,2 ] =precision_score(y_test,y_pred)
    metrics[index,3 ] =recall_score(y_test,y_pred)
columns= ['C','Accuracy' , 'Precision' ,'Recall']
log_df =pd.DataFrame(metrics , columns= columns)
log_df

In [None]:
depth = list(range(2,15))
depth_tuning = np.zeros((len(depth),4))
depth_tuning[:,0] = depth
for index in range(0,len(depth)):
    model = DecisionTreeClassifier(max_depth = depth[index]).fit(X_train,y_train)
    y_pred = model.predict(X_test)
    depth_tuning[index,1]=accuracy_score(y_test,y_pred)
    depth_tuning[index,2]=precision_score(y_test,y_pred)
    depth_tuning[index,3]=recall_score(y_test,y_pred)
columns= ['MaxDepth','Accuracy' , 'Precision' ,'Recall']
tree_df =pd.DataFrame(depth_tuning , columns= columns)
tree_df

## Plotting Decision Tree Rules

In [None]:
model = DecisionTreeClassifier(max_depth = 5).fit(X_train ,y_train)

In [None]:
cols = [col for col in df.columns if col not in custid + target]
exported = tree.export_graphviz(decision_tree = model,
                               out_file = None,
                               feature_names = cols,
                               precision=1,
                               class_names = ['Not Churn','Churn'], 
                               filled = True)
graph = graphviz.Source(exported)
display(graph)