# Experiment 5

## Problem Statement:
To build an advance ANN classification model for churn modelling data with: 
- a. Cross Validation 
- b. Grid Search 
- c. Checkpoint

## GitHub & Google Colab Links: 

GitHub Link: https://github.com/piyush-gambhir/ncu-lab-manual-and-end-semester-projects/blob/main/NCU-CSL312%20-%20DL%20-%20Lab%20Manual/Experiment%205/Experiment%205.ipynb

Google Colab Link:

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/piyush-gambhir/ncu-lab-manual-and-end-semester-projects/blob/main/NCU-CSL312%20-%20DL%20-%20Lab%20Manual/Experiment%205/Experiment%205.ipynb)


## Installing Dependencies:

In [8]:
! pip install tabulate numpy pandas matplotlib seaborn



## Code

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.base import BaseEstimator, ClassifierMixin

In [10]:

# Load the dataset
data = pd.read_csv("./churn_modelling.csv")

# Drop the columns that are not needed for modeling
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Separate features and target variable
X = data.drop('Exited', axis=1)
y = data['Exited']

# Preprocessing for numeric columns: scale numeric features
numeric_features = X.select_dtypes(
    include=['int64', 'float64']).columns.difference(['HasCrCard', 'IsActiveMember'])
numeric_transformer = StandardScaler()

# Preprocessing for categorical columns: one-hot encode categorical features
categorical_features = ['Geography', 'Gender']
categorical_transformer = OneHotEncoder(drop='first')

# Create the preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



In [11]:
# Define the Keras Classifier Wrapper


class KerasClassifierWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, neurons=64):
        self.neurons = neurons
        self.model = None

    def fit(self, X, y, **kwargs):
        def create_model():
            model = Sequential()
            model.add(Dense(self.neurons, activation='relu',
                      input_shape=(X.shape[1],)))
            model.add(Dropout(0.2))
            model.add(Dense(self.neurons, activation='relu'))
            model.add(Dropout(0.2))
            model.add(Dense(1, activation='sigmoid'))
            model.compile(optimizer='adam',
                          loss='binary_crossentropy', metrics=['accuracy'])
            return model

        self.model = create_model()
        self.model.fit(X, y, **kwargs)
        return self

    def predict(self, X, **kwargs):
        return (self.model.predict(X, **kwargs) > 0.5).astype("int32")

    def score(self, X, y, **kwargs):
        _, accuracy = self.model.evaluate(X, y, **kwargs)
        return accuracy

    def get_params(self, deep=True):
        return {'neurons': self.neurons}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [12]:



# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Set up a pipeline that includes preprocessing and the estimator
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', KerasClassifierWrapper())])

# Hyperparameter grid
param_grid = {
    'classifier__neurons': [32, 64, 128],
}

# Grid search setup
grid = GridSearchCV(pipeline, param_grid, cv=3)

# Perform the grid search
grid_result = grid.fit(X_train, y_train)

# Evaluate the model
print("Best parameters found: ", grid_result.best_params_)
print("Best accuracy found: ", grid_result.best_score_)

best_model = grid_result.best_estimator_
X_test_transformed = best_model.named_steps['preprocessor'].transform(X_test)
test_accuracy = best_model.named_steps['classifier'].score(
    X_test_transformed, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7645 - loss: 0.5414
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7920 - loss: 0.4683


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7094 - loss: 0.5776
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7811 - loss: 0.4674


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6786 - loss: 0.6017
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8058 - loss: 0.4477


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7945 - loss: 0.5081
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8012 - loss: 0.4535


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7771 - loss: 0.5325
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7934 - loss: 0.4441


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7644 - loss: 0.5186
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8148 - loss: 0.4228


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7817 - loss: 0.4978
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8183 - loss: 0.4240


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7870 - loss: 0.4995
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7992 - loss: 0.4358


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7589 - loss: 0.5112
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8237 - loss: 0.4139 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7720 - loss: 0.5011
Best parameters found:  {'classifier__neurons': 128}
Best accuracy found:  0.8147505720456442
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8328 - loss: 0.3904 
Test Accuracy: 0.8415
