In [1]:
# import some basics libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# import dataset
dataset=pd.read_csv('Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
#drop unnecessary columns
dataset.drop(['RowNumber', 'CustomerId', 'Surname'],inplace=True, axis=1)

In [9]:
dataset.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [11]:
dataset.columns

Index(['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary',
       'Exited'],
      dtype='object')

In [12]:
## Divide the dataset into independent and dependent features
X = dataset[['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember',
       'Exited']]
y = dataset[['EstimatedSalary']]

In [13]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,619,France,Female,42,2,0.0,1,1,1,1
1,608,Spain,Female,41,1,83807.86,1,0,1,0
2,502,France,Female,42,8,159660.8,3,1,0,1
3,699,France,Female,39,1,0.0,2,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,0


In [14]:
y.head()

Unnamed: 0,EstimatedSalary
0,101348.88
1,112542.58
2,113931.57
3,93826.63
4,79084.1


In [15]:
## Ferature Engineering
geography=pd.get_dummies(X['Geography'],drop_first=True)
gender=pd.get_dummies(X['Gender'],drop_first=True)

In [16]:
geography

Unnamed: 0,Germany,Spain
0,0,0
1,0,1
2,0,0
3,0,0
4,0,1
...,...,...
9995,0,0
9996,0,0
9997,0,0
9998,1,0


In [17]:
gender

Unnamed: 0,Male
0,0
1,0
2,0
3,0
4,0
...,...
9995,1
9996,1
9997,0
9998,1


In [18]:
# drop column
X=X.drop(['Geography','Gender'],axis=1)

In [19]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,619,42,2,0.0,1,1,1,1
1,608,41,1,83807.86,1,0,1,0
2,502,42,8,159660.8,3,1,0,1
3,699,39,1,0.0,2,0,0,0
4,850,43,2,125510.82,1,1,1,0


In [20]:
#concordinate dataset with transformed data
X=pd.concat([X,geography,gender],axis=1)

In [21]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,1,0,0,0
1,608,41,1,83807.86,1,0,1,0,0,1,0
2,502,42,8,159660.8,3,1,0,1,0,0,0
3,699,39,1,0.0,2,0,0,0,0,0,0
4,850,43,2,125510.82,1,1,1,0,0,1,0


In [22]:
#Splitting the dataset into Training set and Test Set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [23]:
#feature Scaling
from sklearn.preprocessing import StandardScaler
sc =StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [25]:
# Part 2 Now lets create the ANN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU,PReLU,ELU,ReLU
from tensorflow.keras.layers import Dropout

In [26]:
# Lets initialize the ANN
regressor=Sequential()

In [27]:
# Adding the input Layer and first hidden layer
regressor.add(Dense(10,input_shape=(11,),activation='relu'))

In [28]:
# adding the second hidden layer
regressor.add(Dense(units=10,activation='relu'))
#regressor.add(Dropout(0.2))

In [29]:
# adding the third hidden layer
regressor.add(Dense(units=10,activation='relu'))
#regressor.add(Dropout(0.3))

In [32]:
# Adding the output layer
regressor.add(Dense(1,activation='linear'))

In [33]:
#summary
regressor.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                120       
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 10)                110       
                                                                 
 dense_5 (Dense)             (None, 1)                 11        
                                                                 
Total params: 351 (1.37 KB)
Trainable params: 351 (1.37 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [38]:
#import tensorflow
#mse=tensorflow.keras.losses.MeanSquaredError(name='mean_squared_error')
#metrics=tensorflow.keras.metrics.MeanSquaredError(name='mean_squared_error', dtype=None)

In [47]:
#regressor.compile(optimizer='adam',loss=mse,metrics=metrics)
regressor.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [48]:
## Early Stopping
import tensorflow as tf
early_stopping=tf.keras.callbacks.EarlyStopping(
    monitor="mean_absolute_error",
    patience=5
)

In [49]:
model_history=regressor.fit(X_train,y_train,validation_data=(X_test,y_test),batch_size=10,epochs=100,callbacks=early_stopping)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


In [50]:
y_pred=regressor.predict(X_test)
y_pred



array([[ 97277.96 ],
       [102830.48 ],
       [ 97211.41 ],
       ...,
       [102455.31 ],
       [ 88976.484],
       [ 93831.04 ]], dtype=float32)

In [51]:
from sklearn.metrics import r2_score
score = r2_score(y_test,y_pred)
score

-0.019423977862644826