In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle


In [35]:
!pip install pandas scikit-learn pickle-mixin



In [36]:
##Load the dataset
data =pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [37]:
###Encode categorical varibales
label_encoder_gender=LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [38]:
##one hot encoding
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo =OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoder


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [39]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [40]:
geo_encoded_df = pd.DataFrame(geo_encoder.toarray(),columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [41]:
##Conbine one hot encoder columns with the original dataframe
data =pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,1,15634602,Hargrave,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,2,15647311,Hill,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,3,15619304,Onio,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,4,15701354,Boni,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,5,15737888,Mitchell,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [42]:
##save the encoders and scaler
with open('label_encoder_gender.pkl','wb') as f:
    pickle.dump(label_encoder_gender,f)

with open('onehot_encoder_geo.pkl','wb') as f:
    pickle.dump(onehot_encoder_geo,f)  

In [43]:
##divide the dataset into independent and dependent features
X = data.drop(['Exited', 'Surname'], axis=1)  # Drop 'Surname' as it's non-numeric
Y = data['Exited']

##split the data in training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
##Scale the features
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [44]:
X_train

array([[ 1.4692775 , -1.24645641,  0.35649971, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-1.19499359,  1.05381124, -0.20389777, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-1.15724427,  0.3664786 , -0.96147213, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.13108128, -1.6259648 ,  0.86500853, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-1.43776677,  1.62852321,  0.15932282, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.78217054,  0.21718071,  0.47065475, ..., -0.99850112,
         1.72572313, -0.57638802]], shape=(8000, 14))

In [47]:
with open('scaler.pkl','wb') as f:
  pickle.dump(scaler,f)

In [49]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.5 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloa

In [51]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime


In [53]:
X_train.shape[1]

14

In [54]:
##Build our ANN model
model= Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ##Hidden layer 1
    Dense(32,activation='relu'), ##Hidden layer 2
    Dense(1,activation='sigmoid') ##Output layer

]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [55]:
model.summary()

In [58]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [59]:
##Compile the model
model.compile(optimizer=opt,loss=loss,metrics=['accuracy'])

In [62]:
## set up the tensorboard
log_dir = 'logs/fit/'+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)

In [65]:
## Set up early stopping
early_stopping_callback = EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)

In [66]:
### Train the model
history = model.fit(X_train,Y_train,validation_data=(X_test,Y_test),epochs=100,callbacks=[early_stopping_callback,tensorflow_callback])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 677us/step - accuracy: 0.8611 - loss: 0.3351 - val_accuracy: 0.8540 - val_loss: 0.3669
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569us/step - accuracy: 0.8596 - loss: 0.3350 - val_accuracy: 0.8585 - val_loss: 0.3475
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562us/step - accuracy: 0.8627 - loss: 0.3304 - val_accuracy: 0.8545 - val_loss: 0.3474
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 559us/step - accuracy: 0.8643 - loss: 0.3273 - val_accuracy: 0.8605 - val_loss: 0.3542
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 565us/step - accuracy: 0.8652 - loss: 0.3260 - val_accuracy: 0.8620 - val_loss: 0.3657
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 564us/step - accuracy: 0.8665 - loss: 0.3244 - val_accuracy: 0.8545 - val_loss: 0.3597
Epoch 7/10

In [68]:
model.save('model.h5')



In [69]:
##Load Tensorboard Extension
%load_ext tensorboard

In [71]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 38645), started 0:01:37 ago. (Use '!kill 38645' to kill it.)

In [None]:
###Load the pickle file
