Import libraries

In [None]:
import numpy              as np
import pandas             as pd
import matplotlib.pyplot  as plt
import seaborn            as sns
import tensorflow         as tf


from sklearn.preprocessing   import OneHotEncoder
from sklearn.compose         import ColumnTransformer
from sklearn.preprocessing   import StandardScaler


from sklearn.model_selection  import train_test_split
from sklearn.linear_model     import LinearRegression
from sklearn.linear_model     import LogisticRegression
from sklearn.neighbors        import KNeighborsClassifier
from sklearn.tree             import DecisionTreeClassifier
from sklearn.ensemble         import RandomForestClassifier
from sklearn.naive_bayes      import GaussianNB
from sklearn.linear_model     import SGDClassifier
from sklearn.metrics          import confusion_matrix,accuracy_score


Load dataset

In [None]:
df=pd.read_csv('../input/telcocustomerchurn/Telco-Customer-Churn.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
df.columns

In [None]:
corr=df.corr()

In [None]:
plt.figure(figsize=(25,10))
sns.heatmap(corr,vmax=0.7,square=True,annot=True)

In [None]:
transform = ColumnTransformer([('One',OneHotEncoder(),[1,3]),('sc',StandardScaler(),[2,5])],remainder='passthrough')

In [None]:
x=transform.fit_transform(df)


In [None]:
column=['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport','StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn']

In [None]:
from sklearn.preprocessing import LabelEncoder
cols = ('customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport','StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn')
# Process columns and apply LabelEncoder to categorical features
for c in cols:
    lbl = LabelEncoder() 
    lbl.fit(list(df[c].values)) 
    df[c] = lbl.transform(list(df[c].values))

# Check shape        
print('Shape data: {}'.format(df.shape))

In [None]:
x=df.drop('Churn',axis=1)
y=df['Churn']


In [None]:
x.head()

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2, random_state = 42)

In [None]:
model=LinearRegression()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)
acc_lin_reg=round(model.score(xtrain,ytrain)*100,2 )
print(str(acc_lin_reg)+ ' percent')

In [None]:
model = LogisticRegression(random_state=10)
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)
print(model.score(xtrain,ytrain))

acc_log_reg=round(model.score(xtrain,ytrain)*100,2 )
print(str(acc_log_reg)+' percent')

In [None]:
model = KNeighborsClassifier(n_neighbors = 3)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
acc_knn = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_knn)

In [None]:
model = DecisionTreeClassifier(max_depth = 15,random_state=100)
model.fit(xtrain, ytrain)
y_pred = model.predict(xtest)
acc_decision_tree = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_decision_tree)


In [None]:
model = RandomForestClassifier(n_estimators=100,max_depth = 15)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
acc_random_forest = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_random_forest)

In [None]:
model = GaussianNB()
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
acc_gnb = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_gnb)

In [None]:
model = SGDClassifier()
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
acc_sgd = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_sgd)

In [None]:
models = pd.DataFrame({
    'Model': ['Linear Regression','Logistic Regression', 
              'KNN', 'Decision Tree', 'Random Forest', 'Naive Bayes', 
              'Stochastic Gradient Decent'],
    
    'Score': [acc_lin_reg,acc_log_reg,acc_knn,  
              acc_decision_tree, acc_random_forest, acc_gnb, 
             acc_sgd]
    })

models.sort_values(by='Score', ascending=False)

In [None]:
model = RandomForestClassifier(n_estimators=100,max_depth = 15)
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
acc_random_forest = round(model.score(xtrain, ytrain) * 100, 2)
print (acc_random_forest)

In [None]:
ypred=model.predict(xtest)
print(confusion_matrix(ytest,ypred))

In [None]:
print(accuracy_score(ytest,ypred))

Artificial Neural **Network**

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout


# Initialising the ANN
classifier =  tf.keras.models.Sequential()

# Adding the hidden layer
classifier.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Adding the second hidden layer
classifier.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Adding the thred hidden layer
classifier.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Adding the output layer
classifier.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))



In [None]:
# Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(xtrain, ytrain, batch_size=32, epochs=100)