### Determining the optimal number of hidden layers and neurons for an Artificial Neural Network (ANN) 
This can be challenging and often requires experimentation. However, there are some guidelines and methods that can help you in making an informed decision:

- Start Simple: Begin with a simple architecture and gradually increase complexity if needed.
- Grid Search/Random Search: Use grid search or random search to try different architectures.
- Cross-Validation: Use cross-validation to evaluate the performance of different architectures.
- Heuristics and Rules of Thumb: Some heuristics and empirical rules can provide starting points, such as:
  -    The number of neurons in the hidden layer should be between the size of the input layer and the size of the output layer.
  -  A common practice is to start with 1-2 hidden layers.

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline # used for scikit-learn pipelines for preprocessing and model training
from scikeras.wrappers import KerasClassifier #used for Keras with scikit-learn for GridSearchCV hyperparameter tuning
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pickle

In [30]:
data=pd.read_csv('Churn_Modelling.csv')
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

onehot_encoder_geo = OneHotEncoder(handle_unknown='ignore')
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

X = data.drop('Exited', axis=1)
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save encoders and scaler for later use
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [31]:
print(y_train.isna().sum())  # Check for missing values

0


In [32]:
# Define a function to create the model
def create_model(optimizer='adam', neurons=32, layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)))

    # Add additional layers
    for _ in range(layers - 1):
        model.add(Dense(neurons, activation='relu'))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])
    return model

In [33]:
# Create a KerasClassifier
# model = KerasClassifier(model=create_model, verbose=1)
from sklearn.pipeline import Pipeline

# Define Keras model wrapper
model = KerasClassifier(model=create_model, verbose=1, layers=1, neurons=32, optimizer='adam')

# Create a pipeline
pipeline = Pipeline([
    ('model', model)
])

In [34]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'model__optimizer': ['adam', 'rmsprop'],  # Optimizers to try
    'model__neurons': [16, 32],               # Number of neurons to try
    'model__layers': [1, 3],                  # Number of layers to try
    'model__batch_size': [16, 32],            # Batch sizes to try
    'model__epochs': [10, 20]                 # Number of epochs to try
}
# Note: The batch size and epochs are not included in the pipeline as they are passed directly to the KerasClassifier

# Perform grid search
grid = GridSearchCV(estimator=pipeline, param_grid=param_grid, n_jobs=-1, cv=2)
grid_result = grid.fit(X_train, y_train)
# Print the best parameters and accuracy
print(f"Best parameters: {grid_result.best_params_}")
print(f"Best accuracy: {grid_result.best_score_}")
# Evaluate the model on the test set
# best_model = grid_result.best_estimator_
# test_accuracy = best_model.score(X_test, y_test)
# print(f"Test accuracy: {test_accuracy}")

Epoch 1/10


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7051 - loss: 0.5966
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8209 - loss: 0.4280
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8301 - loss: 0.4030  
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8354 - loss: 0.3971
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8400 - loss: 0.3846
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 924us/step - accuracy: 0.8412 - loss: 0.3788
Epoch 7/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 954us/step - accuracy: 0.8474 - loss: 0.3707
Epoch 8/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 888us/step - accuracy: 0.8518 - loss: 0.3607
Epoch 9/10
[1m500/500[0m [32m━━━━━━━━━━━