In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [None]:
df=pd.read_csv('/kaggle/input/bank-customer-churn-modeling/Churn_Modelling.csv')
df.head(5)

### Removing those columns which are not helpful in predictions

In [None]:
df.drop(['RowNumber','CustomerId','Surname'],axis='columns',inplace=True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df['Exited'].value_counts()

## PreProcessing the dataset

In [None]:
df.isnull().sum()

In [None]:
df['Geography'].unique()

In [None]:
df['Tenure'].unique()

In [None]:
df['NumOfProducts'].unique()

In [None]:
df.dtypes

In [None]:
df=pd.get_dummies(df,columns=['Gender','Geography'],drop_first=True)

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
cols=['CreditScore','Age','Balance','EstimatedSalary']
df[cols]=scaler.fit_transform(df[cols])

In [None]:
df.head()

### Since we see here that our dataset is imbalanced so we have to do sampling using SMOTE

In [None]:
x=df[['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard','IsActiveMember', 'EstimatedSalary', 'Gender_Male','Geography_Germany', 'Geography_Spain']]
y=df['Exited']

In [None]:
from imblearn.over_sampling import SMOTE
smote=SMOTE(sampling_strategy='minority')
x_sm,y_sm=smote.fit_resample(x,y)

In [None]:
y_sm.value_counts()

## Neural Network Model Training

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x_sm,y_sm,test_size=0.3,random_state=0,stratify=y_sm)

In [None]:
x_train.shape

In [None]:
y_train.value_counts()

In [None]:
model = keras.Sequential([
    keras.layers.Dense(60, input_dim=x_train.shape[1], activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(15, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=100, batch_size=8)

In [None]:
model.evaluate(x_test,y_test)

## Predictions

In [None]:
yp=model.predict(x_test)

In [None]:
yp[:5]

In [None]:
y_pred=[]
for i in yp:
    if i>0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
y_pred[:5]

In [None]:
y_test[:5]

### Visualizing Predictions

In [None]:
import seaborn as sns
cm=tf.math.confusion_matrix(labels=y_test,predictions=y_pred)
sns.heatmap(cm,annot=True,fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))