<a href="https://colab.research.google.com/github/ranesh88/Churn-Prediction-using-ANN/blob/main/HyperparameterTuningANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [None]:
!pip install scikeras

In [None]:
data=pd.read_csv('/content/Churn_Modelling.csv')
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

onehot_encoder_geo = OneHotEncoder(handle_unknown='ignore')
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

X = data.drop('Exited', axis=1)
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save encoders and scaler for later use
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [None]:
!pip install --upgrade scikeras scikit-learn tensorflow


In [None]:
from sklearn.model_selection import train_test_split
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

# Define the model function
def create_model(input_dim, optimizer='adam', activation='relu'):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, activation=activation))
    model.add(Dense(1, activation='sigmoid'))  # For binary classification
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create a function to perform manual grid search
def manual_grid_search(X_train, y_train, X_val, y_val, param_grid):
    best_score = -np.inf
    best_params = {}  # Initialize best_params outside the loop to avoid the NameError

    # Loop through the parameter grid
    for optimizer in param_grid['optimizer']:
        for activation in param_grid['activation']:
            for epochs in param_grid['epochs']:
                for batch_size in param_grid['batch_size']:
                    print(f"Training model with optimizer={optimizer}, activation={activation}, epochs={epochs}, batch_size={batch_size}")

                    # Create and train the model
                    model = KerasClassifier(model=create_model, optimizer=optimizer, activation=activation, epochs=epochs, batch_size=batch_size, input_dim=X_train.shape[1])
                    model.fit(X_train, y_train)

                    # Evaluate the model
                    score = model.score(X_val, y_val)
                    print(f"Score: {score}")

                    # Check if this model has the best score so far
                    if score > best_score:
                        best_score = score
                        best_params = {'optimizer': optimizer, 'activation': activation, 'epochs': epochs, 'batch_size': batch_size}

    print("\nBest score: ", best_score)
    print("Best parameters: ", best_params)
    return best_score, best_params


# Define the parameter grid for manual grid search
param_grid = {
    'optimizer': ['adam', 'sgd'],
    'activation': ['relu', 'tanh'],
    'epochs': [10, 20],
    'batch_size': [32, 64]
}

# Example dataset (replace these with your actual dataset)
# X, y are your features and target variable respectively
X = np.random.rand(100, 10)  # Example feature data (100 samples, 10 features)
y = np.random.randint(2, size=100)  # Example binary target data (0 or 1)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Run the manual grid search
best_score, best_params = manual_grid_search(X_train, y_train, X_val, y_val, param_grid)

# Output the best found parameters and score
print("Best model's parameters:", best_params)
print("Best model's score:", best_score)
