In [10]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.preprocessing import StandardScaler , LabelEncoder , OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [11]:
import sklearn, scikeras, tensorflow as tf
print(sklearn.__version__)
print(scikeras.__version__)
print(tf.__version__)


1.2.2
0.12.0
2.10.0


In [12]:
# data.columns

In [13]:
data = pd.read_csv('Churn_Modelling.csv')
data = data.drop(["RowNumber", "CustomerId", "Surname"] , axis=1)

label_encoder_gender = LabelEncoder()
data["Gender"] = label_encoder_gender.fit_transform(data["Gender"])

onehot_encoder_geo = OneHotEncoder(handle_unknown='ignore')
geo_encoder = onehot_encoder_geo.fit_transform(data[["Geography"]]).toarray()
geo_encoder_df = pd.DataFrame(geo_encoder, columns=onehot_encoder_geo.get_feature_names_out(["Geography"]))

data = pd.concat([data.drop("Geography", axis=1), geo_encoder_df], axis=1)

X = data.drop("Exited", axis=1)
y = data["Exited"]

X_train , X_test , y_train , y_test = train_test_split(X, y , test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
y_test = scaler.transform(X_test)

# Save encoders and scaler for later use 
with open("label_encoder_gender.pkl", 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open("onehot_encoder_geo.pkl", 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

with open("scaler.pkl", 'wb') as file:
    pickle.dump(scaler, file) 

In [14]:
# Define a function to create the model and try difference parameters(kerasClassifer)
def create_model(neurons=32,layers=1):
    model=Sequential()
    model.add(Dense(neurons,activation='relu',input_shape=(X_train.shape[1],)))

    for _ in range(layers-1):
        model.add(Dense(neurons,activation='relu'))

    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam',loss="binary_crossentropy",metrics=['accuracy'])

    return model


In [15]:
# Create a keras classifer
model=KerasClassifier(layers=1,neurons=32,build_fn=create_model,verbose=1)

In [16]:
# Define the grid search parameter
param_grid = {
    "neurons": [16, 32 , 64, 128],
    "layers": [1, 2,],
    "epochs": [50, 100], 
}


In [None]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    n_jobs=-1,
    verbose=1
)

grid_result = grid.fit(X_train, y_train)

print("Best Score:", grid_result.best_score_)
print("Best Params:", grid_result.best_params_)

Fitting 3 folds for each of 16 candidates, totalling 48 fits


1.2.2
0.12.0
2.10.0
