In [None]:
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.sample(5)

In [None]:
df.drop('customerID', axis='columns', inplace=True)
df.dtypes

In [None]:
df1 = df[df.TotalCharges != ' ']
df1.shape

In [None]:
df1.TotalCharges = pd.to_numeric(df1.TotalCharges)

In [None]:
df1.TotalCharges.dtypes

In [None]:
tenure_churn_no = df1[df1.Churn=='No'].tenure
tenure_churn_yes = df1[df1.Churn=='Yes'].tenure

In [None]:
plt.hist([tenure_churn_yes, tenure_churn_no], color = ['green', 'red'], label=['Churn=Yes', 'Churn=No'])
plt.legend()

In [None]:
def print_unique_col_values(df):
    for column in df:
        if df[column].dtypes == 'object':
            print(f'{column} : {df[column].unique()}')

In [None]:
print_unique_col_values(df1)

In [None]:
df1.replace('No internet service', 'No', inplace=True)
df1.replace('No phone service', 'No', inplace=True)

In [None]:
print_unique_col_values(df1)

In [None]:
yes_no_columns = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
                  'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']
for col in yes_no_columns:
    df1[col].replace({'Yes':1, 'No':0}, inplace=True)

In [None]:
for column in df1:
    print(f'{column} : {df1[column].unique()}' )

In [None]:
df1['gender'].replace({'Female':1, 'Male':0}, inplace=True)

In [None]:
df1['gender'].unique()

In [None]:
df2 = pd.get_dummies(data=df1, columns=['InternetService', 'Contract', 'PaymentMethod'])
df2.columns

In [None]:
df2.dtypes

In [None]:
cols_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df2[cols_to_scale]=scaler.fit_transform(df2[cols_to_scale])

In [None]:
df2.sample(3)

In [None]:
X = df2.drop('Churn', axis = 'columns')
y = df2['Churn']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
def ANN(X_train, y_train, X_test, y_test, loss, weights):
    
    model = keras.Sequential([
        keras.layers.Dense(20, input_shape=(26,), activation='relu'),
        keras.layers.Dense(15, activation='relu'),
        keras.layers.Dense(1, activation = 'sigmoid')
        ])
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
    
    if weights == -1:
        model.fit(X_train, y_train, epochs=100)
    else:
        model.fit(X_train, y_train, epochs=100, class_weights=weights)
        
    print(model.evaluate(X_test,y_test))
    ypreds = model.predict(X_test)
    ypreds = np.round(ypreds)
    
    print("Classification report \n ", classification_report(y_test, ypreds))
    
    return ypreds


In [None]:
ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
count_class_0, count_class_1 = df1.Churn.value_counts()

df_class_0 = df2[df2['Churn']==0]
df_class_1 = df2[df2['Churn']==1]

In [None]:
df_class_0.shape

In [None]:
df_class_1.shape

In [None]:
df_class_0_under = df_class_0.sample(count_class_1)
df_test_under = pd.concat([df_class_0_under, df_class_1], axis=0)
df_test_under.shape

In [None]:
X = df_test_under.drop('Churn', axis='columns')
y = df_test_under['Churn']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [None]:
ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
count_class_0, count_class_1

In [None]:
df_class_1_over = df_class_1.sample(count_class_0, replace = True)
df_test_over = pd.concat([df_class_0, df_class_1_over], axis=0)
df_test_over.shape

In [None]:
X = df_test_over.drop('Churn', axis='columns')
y = df_test_over['Churn']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)


In [None]:
ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
X = df2.drop('Churn', axis = 'columns')
y = df2['Churn']

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy='minority')

X_sm, y_sm  = smote.fit_sample(X, y)

y_sm.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_sm, y_sm, test_size=0.2, random_state=15, stratify=y_sm)

In [None]:
y_test.value_counts()

In [None]:
y_train.value_counts()

In [None]:
ypreds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
y_pred = []
for ex in ypreds:
    if ex > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)


In [None]:
submission = pd.DataFrame({ 'PassengerId' : y_test, 'Survived': y_pred})
submission.to_csv('submission.csv', index=False)

In [None]:
X = df2.drop('Churn', axis = 'columns')
y = df2['Churn']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)

In [None]:
df3=X_train.copy()
df3["Churn"] = y_train

In [None]:
df3_class0 = df3[df3['Churn']==0]
df3_class1 = df3[df3['Churn']==1]

In [None]:
df3_class0.shape, df3_class1.shape

In [None]:
def get_batch(df_maj, df_min, start, end):
    df_train = pd.concat([df_maj[start:end], df_min], axis=0)

    X_train = df_train.drop('Churn', axis='columns')
    y_train = df_train['Churn']

    return X_train, y_train

In [None]:
X_train, y_train = get_batch(df3_class0, df3_class1, 0 , 1495)
y_pred1 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)


In [None]:
X_train, y_train = get_batch(df3_class0, df3_class1, 1495 , 2990)
y_pred2 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
X_train, y_train = get_batch(df3_class0, df3_class1, 2990 , 4130)
y_pred3 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
y_pred_final = y_pred1.copy()

for i in range(len(y_pred1)):
    n_ones  = y_pred1[i]+y_pred2[i]+y_pred3[i]
    if n_ones > 1 :
        y_pred_final[i] = 1
    else :
        y_pred_final[i] = 0
    

In [None]:
print("Classification report \n ", classification_report(y_test, y_pred_final))

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp=model.predict(X_test)
yp[:5]

In [None]:
y_pred[:5]

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))

In [None]:
import seaborn as sns

cm = tf.math.confusion_matrix(labels=y_test, predictions=y_pred)
plt.figure(figsize=(17, 6))
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')