In [122]:
import pandas as pd
import seaborn as sns
import numpy as np

In [123]:
df = pd.read_csv('Churn_Modelling.csv')

In [124]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [125]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [126]:
df = df.drop(['RowNumber', 'Surname', 'CustomerId'], axis= 1) #Dropping the unnecessary columns 

#### Converting to categorical variables

In [127]:
geography = pd.get_dummies(df['Geography'],drop_first = True, dtype=int)
gender = pd.get_dummies(df['Gender'],drop_first = True, dtype=int)

In [128]:
df = pd.concat([df, gender, geography], axis=1)

In [129]:
df.drop(['Gender','Geography'],axis = 1, inplace = True)
df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Male,Germany,Spain
0,619,42,2,0.00,1,1,1,101348.88,1,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,42,8,159660.80,3,1,0,113931.57,1,0,0,0
3,699,39,1,0.00,2,0,0,93826.63,0,0,0,0
4,850,43,2,125510.82,1,1,1,79084.10,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,1,0,0
9996,516,35,10,57369.61,1,1,1,101699.77,0,1,0,0
9997,709,36,7,0.00,1,0,1,42085.58,1,0,0,0
9998,772,42,3,75075.31,2,1,0,92888.52,1,1,1,0


In [130]:
X = df[['CreditScore','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary','Male','Germany','Spain']]
y = df['Exited']

### Resampling Data

In [136]:
y.value_counts()

Exited
0    7963
1    2037
Name: count, dtype: int64

In [137]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state = 0)
x_res, y_res = ros.fit_resample(X_scaled,y)
y_res.value_counts()

Exited
1    7963
0    7963
Name: count, dtype: int64

### Normalizing values with minmax scaler

In [138]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [140]:
X_scaled = sc.fit_transform(x_res)
X_scaled

array([[-0.29877723,  0.08418894, -1.01840607, ..., -1.01799435,
        -0.6647702 , -0.54556873],
       [-0.4103938 , -0.01032629, -1.36135608, ..., -1.01799435,
        -0.6647702 ,  1.83294963],
       [-1.48597169,  0.08418894,  1.03929402, ..., -1.01799435,
        -0.6647702 , -0.54556873],
       ...,
       [-0.84671313,  1.02934128,  0.01044398, ...,  0.98232373,
         1.50427922, -0.54556873],
       [-0.96847667,  0.65128034, -0.67545605, ..., -1.01799435,
        -0.6647702 ,  1.83294963],
       [-1.5874413 ,  0.74579558,  1.03929402, ..., -1.01799435,
        -0.6647702 , -0.54556873]])

### Model Building

In [141]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_res, test_size = 0.30)

In [142]:
from sklearn.neural_network import MLPClassifier

In [143]:
ann = MLPClassifier(hidden_layer_sizes = (100,100,100),
                  random_state = 0,
                  max_iter = 100,
                  activation = 'relu')

In [144]:
ann.fit(X_train, y_train)



In [146]:
y_pred = ann.predict(X_test)

In [147]:
from sklearn.metrics import classification_report, accuracy_score
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [148]:
print(report)

              precision    recall  f1-score   support

           0       0.96      0.85      0.90      2412
           1       0.87      0.96      0.91      2366

    accuracy                           0.91      4778
   macro avg       0.91      0.91      0.91      4778
weighted avg       0.91      0.91      0.91      4778



In [149]:
accuracy

0.9077019673503558