In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# read data from csv
df = pd.read_csv('data/Churn_Modelling.csv')

In [4]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
X = df.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)
y = df['Exited']

In [5]:
# one hot endcoding 
X = pd.get_dummies(X, columns=['Geography','Gender'])
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,True,False,False,True,False
1,608,41,1,83807.86,1,0,1,112542.58,False,False,True,True,False
2,502,42,8,159660.8,3,1,0,113931.57,True,False,False,True,False
3,699,39,1,0.0,2,0,0,93826.63,True,False,False,True,False
4,850,43,2,125510.82,1,1,1,79084.1,False,False,True,True,False


In [6]:
from sklearn.preprocessing import StandardScaler

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout

In [8]:
#  stack of layers where each layer has exactly one input tensor and one output tensor
model = Sequential()
model.add(Dense(1000, activation='relu', input_dim = X.shape[1]))
model.add(Dropout(0.5))

model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation="relu"))

model.add(Dense(1, activation = 'sigmoid'))

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1000)              14000     
                                                                 
 dropout (Dropout)           (None, 1000)              0         
                                                                 
 dense_1 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_2 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_3 (Dense)             (None, 1)                 1001      
                                                                 
Total params: 2,017,001
Trainable params: 2,017,001
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(optimizer='adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy']
              )

In [12]:
model.fit(X_train, y_train.to_numpy(), batch_size = 20, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2597e42d9a0>

In [13]:
y_preds = model.predict(X_test)
y_preds



array([[0.10017665],
       [0.07867154],
       [0.01510875],
       ...,
       [0.02423517],
       [0.99996674],
       [0.08973625]], dtype=float32)

In [14]:
model.evaluate(X_test, y_test.to_numpy())



[0.35013484954833984, 0.8579999804496765]

In [15]:
model.save('my_model.h5')

In [16]:
from tensorflow.keras.models import load_model

In [17]:
new_model = load_model('my_model.h5')

In [19]:
new_model.predict(X_test)



array([[0.10017665],
       [0.07867154],
       [0.01510875],
       ...,
       [0.02423517],
       [0.99996674],
       [0.08973625]], dtype=float32)