In [1]:
import pandas as pd
import numpy as np

Churn = pd.read_csv('Churn_Modelling.csv')
Churn.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [2]:
Churn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [3]:
X = Churn.iloc[:, 3 : 13]

In [4]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [5]:
y = Churn.iloc[: , 13]
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [6]:
Churn.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [7]:
geography = pd.get_dummies(X['Geography'])
gender = pd.get_dummies(X['Gender'], drop_first = True)

In [8]:
geography, gender

(      France  Germany  Spain
 0          1        0      0
 1          0        0      1
 2          1        0      0
 3          1        0      0
 4          0        0      1
 ...      ...      ...    ...
 9995       1        0      0
 9996       1        0      0
 9997       1        0      0
 9998       0        1      0
 9999       1        0      0
 
 [10000 rows x 3 columns],
       Male
 0        0
 1        0
 2        0
 3        0
 4        0
 ...    ...
 9995     1
 9996     1
 9997     0
 9998     1
 9999     0
 
 [10000 rows x 1 columns])

In [9]:
X = pd.concat([X, geography], axis = 1)
X = X.drop(['Geography'], axis = 1)
X['Gender'] = gender

In [10]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Gender           10000 non-null  uint8  
 2   Age              10000 non-null  int64  
 3   Tenure           10000 non-null  int64  
 4   Balance          10000 non-null  float64
 5   NumOfProducts    10000 non-null  int64  
 6   HasCrCard        10000 non-null  int64  
 7   IsActiveMember   10000 non-null  int64  
 8   EstimatedSalary  10000 non-null  float64
 9   France           10000 non-null  uint8  
 10  Germany          10000 non-null  uint8  
 11  Spain            10000 non-null  uint8  
dtypes: float64(2), int64(6), uint8(4)
memory usage: 664.2 KB


In [11]:
X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,1


In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [13]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from kerastuner.tuners import RandomSearch

In [32]:
X_train.shape
from tensorflow import keras

In [33]:
def model_NN(hp):
    model = Sequential()
    for i in range(hp.Int('num_layers', 2, 30)):
        model.add(Dense(units = hp.Int('units_' + str(i), min_value = 32, max_value = 600, step = 64), activation = 'relu'))
    
    model.add(Dense(1, kernel_initializer = 'normal', activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy', optimizer = keras.optimizers.Adam(hp.Choice('learning_rate',[1e-2, 1e-3, 1e-4])), metrics = ['accuracy'])
    return model

In [34]:
tuner = RandomSearch(model_NN, objective = 'val_accuracy', max_trials = 5, executions_per_trial = 3, directory = 'project2', project_name = 'Churn1')

In [35]:
tuner.search(X_train, y_train, epochs = 5, validation_data = (X_test, y_test))

Trial 5 Complete [00h 01m 40s]
val_accuracy: 0.7925000190734863

Best val_accuracy So Far: 0.8571666677792867
Total elapsed time: 00h 08m 18s
INFO:tensorflow:Oracle triggered exit
