In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle
import streamlit as st
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime
import tensorflow



In [2]:
#Load data set
data = pd.read_csv("Churn_Modelling.csv")

print(data.head())

#Preprocess the data
## Drop irrelevent columns

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1)
print(data.head())

#Encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

print(data.head())

# OneHotEncode Geography

onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']])
print(geo_encoder.toarray())

print(onehot_encoder_geo.get_feature_names_out(['Geography']))

geo_encoded_df = pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

print(geo_encoded_df)

#Combine OneHotEncode Data with the original data

data = pd.concat([data.drop('Geography', axis= 1), geo_encoded_df], axis=1)
print(data.head())

#Save the encoder and scaler

with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)   


#Divide the dataset into dependent and independent features
x = data.drop('Exited', axis = 1)
y = data['Exited']

print(y.head())

#Split the data in training and testing sets

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

## Scale these features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)


print(x_train)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)



#Build our ANN model

model = Sequential([
    Dense(64, activation='relu', input_shape = (x_train.shape[1],)), # HL1 Connected with input layer
    Dense(32, activation = 'relu'), #HL2
    Dense(1, activation= 'sigmoid') ## Output layer
])    

model.summary()

#Compile the model (Inorder to do the forward and backward propagation)

opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss = tensorflow.keras.losses.BinaryCrossentropy()

model.compile(optimizer= opt, loss= loss, metrics=['accuracy'])

##Set up the tensorboard
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
log_dir = "log/fit/" +datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

tensorflow_callback = TensorBoard(log_dir= log_dir, histogram_freq = 1)

#Set up Early Stopping
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights= True)

#Train the model
history = model.fit(
    x_train, y_train, validation_data = (x_test, y_test), epochs = 100,
    callbacks= [tensorflow_callback, early_stopping_callback]
)

model.save('model.h5')

# Load Tensorboard Extension
%load_ext tensorboard

%tensorboard --logdir log/fit

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8053 - loss: 0.4513 - val_accuracy: 0.8525 - val_loss: 0.3542
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8553 - loss: 0.3673 - val_accuracy: 0.8550 - val_loss: 0.3437
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8534 - loss: 0.3499 - val_accuracy: 0.8570 - val_loss: 0.3466
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8598 - loss: 0.3429 - val_accuracy: 0.8555 - val_loss: 0.3492
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 981us/step - accuracy: 0.8578 - loss: 0.3368 - val_accuracy: 0.8565 - val_loss: 0.3477
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8661 - loss: 0.3281 - val_accuracy: 0.8580 - val_loss: 0.3410
Epoch 7/100
[1m250/



Reusing TensorBoard on port 6007 (pid 92644), started 0:43:57 ago. (Use '!kill 92644' to kill it.)