In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
import tensorflow as tf
import pickle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv("Churn_Modelling.csv")

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
gender_encoder = LabelEncoder()
df["Gender"]=gender_encoder.fit_transform(df["Gender"])
geo_onehot_encoder = OneHotEncoder()
onehot_encoded = geo_onehot_encoder.fit_transform(df["Geography"].values.reshape(-1,1)).toarray()
geo_df = pd.DataFrame(onehot_encoded, columns = ["France", "Germany", "Spain"])
df = pd.concat([df, geo_df], axis=1)
df.drop("Geography", axis=1, inplace=True)


In [5]:
df=df.iloc[:, 3:13]

In [6]:
df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,42,2,0.00,1,1,1,101348.88,1
1,608,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,42,8,159660.80,3,1,0,113931.57,1
3,699,0,39,1,0.00,2,0,0,93826.63,0
4,850,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,0,36,7,0.00,1,0,1,42085.58,1
9998,772,1,42,3,75075.31,2,1,0,92888.52,1


In [7]:
X = df.drop("Exited", axis=1)
Y = df["Exited"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [8]:
X_train

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
9254,686,1,32,6,0.00,2,1,1,179093.26
1561,632,1,42,4,119624.60,2,1,1,195978.86
1670,559,1,24,3,114739.92,1,1,0,85891.02
6087,561,0,27,9,135637.00,1,1,0,153080.40
6669,517,1,56,9,142147.32,1,0,0,39488.04
...,...,...,...,...,...,...,...,...,...
5734,768,1,54,8,69712.74,1,1,1,69381.05
5191,682,0,58,1,0.00,1,1,1,706.50
5390,735,0,38,1,0.00,3,0,0,92220.12
860,667,1,43,8,190227.46,1,1,0,97508.04


In [9]:
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [10]:

with open("ht_label_encoder_gender.pkl", "wb") as file:
    pickle.dump(gender_encoder, file)
with open("ht_onehot_encoder_geo.pkl", "wb") as file:
    pickle.dump(geo_onehot_encoder, file)
with open("ht_scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)

In [15]:
import sklearn
print(sklearn.__version__)

1.5.2


In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

# Define the model creation function that accepts hyperparameters as arguments
def create_model(layers=1, units=32):
    model = Sequential()
    # Add the first layer
    model.add(Dense(units, input_dim=X_train.shape[1], activation='relu'))
    
    # Add additional layers based on the 'layers' parameter
    for _ in range(layers - 1):
        model.add(Dense(units, activation='relu'))
    
    # Output layer
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create the KerasClassifier by passing the model creation function
model = KerasClassifier(layers=1,units=32,build_fn=create_model, epochs=100, batch_size=10, verbose=0)

# Define the parameter grid to search over
param_grid = {
    'layers': [1, 2, 3],  # Number of layers to try
    'units': [32, 64],  # Number of units per layer
    'epochs': [50, 100]  # Number of epochs
}

# Create the GridSearchCV object
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose=1)

# Fit the grid search
grid_result = grid.fit(X_train, Y_train)

# Print the best result
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")


Fitting 3 folds for each of 12 candidates, totalling 36 fits


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best: 0.8533743859661954 using {'epochs': 50, 'layers': 1, 'units': 64}
