In [1]:
%cd drive/My Drive/

/content/drive/My Drive


In [2]:
%cd GOOGLE-COLAB/data/

/content/drive/My Drive/GOOGLE-COLAB/data


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

In [4]:
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score


import tensorflow as tf
from tensorflow import keras 



In [5]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
X = df.iloc[:,3:13]
y = df.iloc[:,13]
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [7]:
geography=pd.get_dummies(X["Geography"],drop_first=True)
gender=pd.get_dummies(X['Gender'],drop_first=True)

In [8]:
X = pd.concat([X,gender,geography],axis=1)
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
0,619,France,Female,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,1


In [9]:
X.drop(['Gender','Geography'],axis=1,inplace=True)
X.head()


Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_train.shape

(8000, 11)

In [12]:
def create_model(nodes,activation='relu'):
  model = keras.models.Sequential()
  for i,node in enumerate(nodes):
    if i==0:
      model.add(keras.layers.Dense(node,input_dim=X_train.shape[1]))
      model.add(keras.layers.Activation(activation))
      model.add(keras.layers.Dropout(0.3))
    else:
      model.add(keras.layers.Dense(node))
      model.add(keras.layers.Activation(activation))
      model.add(keras.layers.Dropout(0.3))

  model.add(keras.layers.Dense(units = 1, kernel_initializer= 'glorot_uniform', activation = 'sigmoid'))
    
  model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
  return model

In [13]:
model = keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, verbose=0)

In [14]:
#layers = [[20], [40, 20], [45, 30, 15]]
layers = [[20], [40, 20], [45, 30, 15]]
activations = ['sigmoid', 'relu']

In [15]:
param_grid = dict(nodes=layers, activation=activations,batch_size = [128, 256], epochs=[10])
param_grid

{'activation': ['sigmoid', 'relu'],
 'batch_size': [128, 256],
 'epochs': [10],
 'nodes': [[20], [40, 20], [45, 30, 15]]}

In [16]:
grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=5)

In [17]:
grid_result = grid.fit(X_train, y_train)

In [18]:
[grid_result.best_score_,grid_result.best_params_]

[0.8381250143051148,
 {'activation': 'relu', 'batch_size': 128, 'epochs': 10, 'nodes': [40, 20]}]

In [26]:
confusion_matrix(y_test,grid.predict(X_test))

array([[1521,   41],
       [ 269,  169]])

In [27]:
accuracy_score(y_test,grid.predict(X_test))

0.845

In [19]:
#!pip install scikit-learn==0.21.2



## Extra Stuff

In [34]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [35]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()

In [37]:
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
X

array([[619, 0, 'Female', ..., 1, 1, 101348.88],
       [608, 2, 'Female', ..., 0, 1, 112542.58],
       [502, 0, 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 0, 'Female', ..., 0, 1, 42085.58],
       [772, 1, 'Male', ..., 1, 0, 92888.52],
       [792, 0, 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [38]:
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
X


array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [39]:
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]
X

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


array([[0.0000000e+00, 0.0000000e+00, 6.1900000e+02, ..., 1.0000000e+00,
        1.0000000e+00, 1.0134888e+05],
       [0.0000000e+00, 1.0000000e+00, 6.0800000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 1.1254258e+05],
       [0.0000000e+00, 0.0000000e+00, 5.0200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 1.1393157e+05],
       ...,
       [0.0000000e+00, 0.0000000e+00, 7.0900000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 4.2085580e+04],
       [1.0000000e+00, 0.0000000e+00, 7.7200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 9.2888520e+04],
       [0.0000000e+00, 0.0000000e+00, 7.9200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 3.8190780e+04]])

In [45]:
dataset['Geography'] = labelencoder_X_1.fit_transform(dataset['Geography'])
dataset['Gender'] = labelencoder_X_2.fit_transform(dataset['Gender'])

In [47]:
dataset.head(10)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,2,0,43,2,125510.82,1,1,1,79084.1,0
5,6,15574012,Chu,645,2,1,44,8,113755.78,2,1,0,149756.71,1
6,7,15592531,Bartlett,822,0,1,50,7,0.0,2,1,1,10062.8,0
7,8,15656148,Obinna,376,1,0,29,4,115046.74,4,1,0,119346.88,1
8,9,15792365,He,501,0,1,44,4,142051.07,2,0,1,74940.5,0
9,10,15592389,H?,684,0,1,27,2,134603.88,1,1,1,71725.73,0
