In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle



In [None]:
# load the dataset
data = pd.read_csv("Churn_Modelling.csv")
data.head()

In [None]:
# pre process data

# drop irrelevant columns

data = data.drop(columns=['RowNumber', 'CustomerId', 'Surname'])


In [None]:
data.head()

In [None]:
#  encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
# One hot encode the 'Geography' column
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoder

In [None]:
onehot_encoder_geo.get_feature_names_out()

In [None]:
geo_encoded_df=pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geo.get_feature_names_out())
geo_encoded_df

In [None]:
# combile one hot encoded columns with the original data

data = pd.concat([data.drop(columns=['Geography']), geo_encoded_df], axis=1)
data.head()

In [None]:
# save the encoders and scaler

with open('label_encoder_gender.pkl', 'wb') as f:
    pickle.dump(label_encoder_gender, f)
with open('onehot_encoder_geo.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_geo, f)


In [None]:
#  divide the dataset into independent and dependent features
X = data.drop(columns=['Exited']) # these are the independent features to predict the target variable
y = data['Exited'] # this is the target variable



In [None]:
#  split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

ANN Implementation

In [None]:

import tensorflow as tf
from tensorflow.keras.models import Sequential # used to create a sequential model
from tensorflow.keras.layers import Dense # used to create a dense layer
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard # used to stop the training when the model stops improving

In [None]:
# build our ANN model
num_input_layers = (X_train.shape[1],) 


model = Sequential([
    Dense(64, activation='relu', input_shape=num_input_layers),  # HL1 connected to input layer
    Dense(32, activation='relu'),  # HL2 connected to previous HL
    Dense(1, activation='sigmoid'),  # output layer for binary classification
    
]
)

In [None]:
model.summary()

In [None]:

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
#  setup tensorboard 
import datetime
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# setup up early stopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [None]:
# Train the model
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=100, callbacks=[tensorflow_callback, early_stopping_callback])

In [None]:
#  save the model
model.save('model.h5')

In [None]:
#  load tensorboard extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit

In [None]:
#  load the pickle file
