# EDA and Prediction

Churn is a one of the biggest problem in  the telecom industry. Research has shown that the average monthly churn rate among the top 4 wireless carriers in the US is 1.9% - 2%. 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns # For creating plots
import matplotlib.ticker as mtick # For specifying the axes tick format 
import matplotlib.pyplot as plt

sns.set(style = 'white')

# Input data files are available in the "../input/" directory.

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

**Let us read the data file in the python notebook**

In [None]:
telecom_cust = pd.read_csv('../input/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
telecom_cust.head()

In [None]:
#Feature Extraction
X = telecom_cust.drop(['customerID','Churn'], axis = 1)

In [None]:
#Label Extraction

y = telecom_cust['Churn'].copy()

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 0)

In [None]:
#Replacing Spaces with Nan
X_train['TotalCharges'].replace(' ', np.NaN,inplace = True)
X_test['TotalCharges'].replace(' ',np.NaN, inplace = True)

In [None]:
#Converting the type of column
X_train['TotalCharges'] = X_train['TotalCharges'].astype(float)
X_test['TotalCharges'] = X_test['TotalCharges'].astype(float)

In [None]:
#Filling missing values
X_train['TotalCharges'].fillna(X_train['TotalCharges'].mean(),inplace = True)
X_test['TotalCharges'].fillna(X_test['TotalCharges'].mean(),inplace = True)

In [None]:
print(X_train.isnull().sum())

In [None]:
cat_cols = X_train.select_dtypes(include = 'O').columns.tolist()
print(cat_cols)

In [None]:
from sklearn.preprocessing import LabelEncoder
for x in cat_cols:
    le = LabelEncoder()
    X_train[x] = le.fit_transform(X_train[x])

In [None]:
for x in cat_cols:
    le = LabelEncoder()
    X_test[x] = le.fit_transform(X_test[x])

In [None]:
#Encoding train data target    
y_train = y_train.replace({'No':0, 'Yes':1})

y_test = y_test.replace({'No':0, 'Yes':1})

In [None]:
X_train.head()

In [None]:
X_test.head()

In [None]:
y_train.head()

In [None]:
y_test.head()

In [None]:
from sklearn.ensemble import AdaBoostClassifier
ada_model = AdaBoostClassifier(random_state = 0)
ada_model.fit(X_train,y_train)
y_pred = ada_model.predict(X_test)
print(y_pred)

e = ada_model.score(X_test,y_test)
print(e)

In [None]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

ada_score = accuracy_score(y_test,y_pred)

print(ada_score)

In [None]:
ada_cm = confusion_matrix(y_test,y_pred)
ada_cm

In [None]:
ada_cr = classification_report(y_test, y_pred)
print(ada_cr)

In [None]:
from xgboost import XGBClassifier

#parameter list 
parameters= {'learning_rate':[0.1,0.15,0.2,0.25,0.3],'max_depth':range(1,3)}

xgb_model = XGBClassifier(random_state =0)
xgb_model.fit(X_train,y_train)
score = xgb_model.score(X_test,y_test)
print(score)

In [None]:
y_pred = xgb_model.predict(X_test)

xgb_score = accuracy_score(y_test,y_pred)

print(xgb_score)

In [None]:
xgb_cm = confusion_matrix(y_test,y_pred)
xgb_cm

In [None]:
xgb_cr = classification_report(y_test,y_pred)
print(xgb_cr)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
X_test.shape

In [None]:
y_test.shape

In [None]:
clf_model = GridSearchCV(xgb_model,parameters)
clf_model.fit(X_train,y_train)
y_pred = clf_model.predict(X_test)

clf_score = accuracy_score(y_test, y_pred)
print(clf_score)

clf_cm = confusion_matrix(y_test, y_pred)
print(clf_cm)

clf_cr = classification_report(y_test, y_pred)
print(clf_cr)


In [None]:
print(y_pred)