#### Determining the optimal number of hidden layers and neurons for an Artificial Neural Network (ANN)

This can be challanging nd often require experimeentation. However some guidelines can be followed

* Start Simple: Begin with a simple architecture and gradually increase complexity if needed.
* Grid Search/Random Search: Use grid or random search to try different architecture 
* Cross-Validation: Use cross-validation to evaluate performance of different architecture 
* Heuristics and rule of thumb: Some heuristic and empherical rule can provide starting point,
     - The numeber of neurons in the hidden layer should be between the size of the input layer and the size of output layer 
    - A common practise is to start with 1-2 hidden layer.

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [24]:
data = pd.read_csv('Churn_Modelling.csv')
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

onehot_encoder_geo = OneHotEncoder(handle_unknown='ignore')
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder_df = pd.DataFrame(geo_encoder, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography', axis=1), geo_encoder_df], axis=1)

X = data.drop('Exited', axis=1)
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the encoder and scaler for later use
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)    

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [25]:
# Define a fucntion to create the model and try different parameters(KerasClassifier)

def create_model(neurons=32,layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)))

    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))

    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam',loss="binary_crossentropy", metrics=['accuracy'])

    return model


In [26]:
# Create keras classifier 

model = KerasClassifier(layers=1, neurons=32, build_fn = create_model, epochs=50, batch_size=10, verbose=0)

In [27]:
#  Define grid search parameter
param_grid = {
    'neurons' : [16,32,64,128],
    'layers' : [1,2],
    'epochs' : [50,100]
}

In [None]:
# perfrom grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose=1)
grid_result = grid.fit(X_train, y_train)

# print the best parameters
print("Best: %f using %s"% (grid_result.best_score_,grid_result.best_params_))