In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
df=pd.read_csv("../input/churnmodelling/Churn_Modelling.csv")
df.head()

In [None]:
df.shape

In [None]:
# Unwanted columns are dropped ['RowNumber','CustomerId','Surname']
df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [None]:
df.head(3)

In [None]:
df.info()

In [None]:
# Converting object datatype to int by labelencider
df.Gender.unique(),df.Geography.unique()

In [None]:
label_en=LabelEncoder()

In [None]:

def encoding(df,features):
  df[features]=label_en.fit_transform(df[features])

In [None]:
encoding(df,'Gender')
encoding(df,'Geography')
df.Gender.unique(),df.Geography.unique()

In [None]:
df.info()

In [None]:
X=df.iloc[:,:10]
y=df.Exited       

In [None]:
X.head()

In [None]:
y.head()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=10)

In [None]:
X_train.shape, X_test.shape

In [None]:
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
model=Sequential()
model.add(Dense(64,input_dim=10))
model.add(Dense(32))
model.add(Dense(1,activation='sigmoid'))
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history=model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=20)

In [None]:
# Accuracy Plot

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['Train','Validation'],loc='upper left')
plt.show()

# Loss Plot

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Train','Validation'],loc='upper left')
plt.show()

In [None]:
prediction=np.round(model.predict(X_test))

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
accuracy_score(y_test,prediction)

In [None]:
confusion_matrix(y_test,prediction)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,prediction))

# Improvements
1. Add in class weight to handle imbalance
2. Increase units in dense layers and also increase number of layers
3. Add BatchNormalization to layers.
4. Add Dropout to layers

In [None]:
from sklearn.utils.class_weight import compute_class_weight

In [None]:
class_weights=compute_class_weight(class_weight='balanced',classes=np.unique(y_train), y=y_train)
class_weights

In [None]:
class_weights={i:k for i,k in enumerate(class_weights)}
class_weights

In [None]:
from tensorflow.keras.layers import Dense,Dropout,Activation,BatchNormalization
model2=Sequential()
model2.add(tensorflow.keras.layers.InputLayer(input_shape=(10,)))
model2.add(BatchNormalization())
model2.add(Dense(128,activation='relu'))
model2.add(Dropout(0.3))
model2.add(BatchNormalization())
model2.add(Dense(64,activation='relu'))
model2.add(Dropout(0.3))
model2.add(BatchNormalization())
model2.add(Dense(32,activation='relu'))
model2.add(Dense(1,activation='sigmoid'))

model2.summary()

In [None]:
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history2=model2.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=20,class_weight=class_weights)

In [None]:
# Accuracy Plot

plt.plot(history2.history['accuracy'])
plt.plot(history2.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['Train','Validation'],loc='upper left')
plt.show()

# Loss Plot

plt.plot(history2.history['loss'])
plt.plot(history2.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Train','Validation'],loc='upper left')
plt.show()

In [None]:
prediction2=np.round(model2.predict(X_test))

In [None]:
accuracy_score(y_test,prediction2)

In [None]:
confusion_matrix(y_test,prediction2)

In [None]:
print(classification_report(y_test,prediction2))

In [None]:
y_test, prediction2