In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder,LabelEncoder
import pickle

In [2]:
data=pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
#drop first three columns
data=data.drop(columns=['RowNumber','CustomerId','Surname'],axis=1)
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
##Encode categorial features(gender)
label_encode_gender=LabelEncoder()
data['Gender']=label_encode_gender.fit_transform(data['Gender'])


In [5]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [6]:
data.tail()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9995,771,France,1,39,5,0.0,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.0,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1
9999,792,France,0,28,4,130142.79,1,1,0,38190.78,0


In [7]:
#one hot encoding for geography
one_hot_encode_geo=OneHotEncoder()
geo_encoder=one_hot_encode_geo.fit_transform(data[['Geography']])
geo_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [8]:
geo_encoder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], shape=(10000, 3))

In [9]:
one_hot_encode_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [10]:
data_new=pd.DataFrame(geo_encoder.toarray(),columns=one_hot_encode_geo.get_feature_names_out(['Geography']))

In [11]:
data_new

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [12]:
data = pd.concat([data, data_new], axis=1)


In [13]:
data=data.drop(columns='Geography',axis=1)

In [14]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [15]:
#save encoders and scalar
with open('label_encoder_gender.pkl','wb')as file:
    pickle.dump(label_encode_gender,file)
with open('one_hot_encoder_geo.pkl','wb') as file:
    pickle.dump(one_hot_encode_geo,file)

In [16]:
#Divide data in independent and dependent
X=data.drop(columns='Exited',axis=1)
Y=data['Exited']

In [17]:
X

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,0.0,1.0,0.0


In [18]:
Y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [19]:
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.2)
scalar=StandardScaler()
X_train=scalar.fit_transform(X_train)
X_test=scalar.transform(X_test)

In [20]:
X_train

array([[ 0.9665869 , -1.0944415 , -0.37793298, ..., -0.99975003,
         1.73494238, -0.57850497],
       [ 0.8112183 ,  0.91370804,  0.57454909, ..., -0.99975003,
         1.73494238, -0.57850497],
       [ 0.98730271,  0.91370804, -0.47318119, ..., -0.99975003,
        -0.57638802,  1.72859362],
       ...,
       [-1.02213118,  0.91370804,  1.43178295, ..., -0.99975003,
         1.73494238, -0.57850497],
       [-1.0532049 , -1.0944415 ,  0.19355626, ...,  1.00025003,
        -0.57638802, -0.57850497],
       [-0.01741423, -1.0944415 , -1.90190429, ...,  1.00025003,
        -0.57638802, -0.57850497]], shape=(8000, 12))

In [21]:
X_test

array([[-0.23493027,  0.91370804, -0.85417401, ..., -0.99975003,
         1.73494238, -0.57850497],
       [-2.86583856,  0.91370804,  0.28880447, ...,  1.00025003,
        -0.57638802, -0.57850497],
       [ 0.42797576, -1.0944415 , -0.94942222, ...,  1.00025003,
        -0.57638802, -0.57850497],
       ...,
       [ 0.54191273, -1.0944415 , -0.28268477, ..., -0.99975003,
         1.73494238, -0.57850497],
       [ 1.45340851, -1.0944415 , -0.18743656, ..., -0.99975003,
        -0.57638802,  1.72859362],
       [ 0.90443946, -1.0944415 , -0.56842939, ...,  1.00025003,
        -0.57638802, -0.57850497]], shape=(2000, 12))

In [22]:
with open('scalar.pkl','wb') as file:
    pickle.dump(scalar,file)

ANN IMPLEMENTATION

In [23]:
import tensorflow as tf

In [24]:
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [25]:
models=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ##HL1
    Dense(32,activation='relu'), ##HL2
    Dense(1,activation='sigmoid')  ##Output
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
models.summary()

In [27]:
opt=tensorflow.keras.optimizers.Adam()
losses=tensorflow.keras.losses.BinaryCrossentropy()

In [28]:
models.compile(optimizer=opt,loss=losses,metrics=['accuracy'])

In [29]:
##Setup the tenser board
log_dir='logs/fit/'+datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [30]:
early_stopping=EarlyStopping(monitor="val_loss",patience=12,restore_best_weights=True)

In [31]:
#train the model
history=models.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8001 - loss: 0.4563 - val_accuracy: 0.8350 - val_loss: 0.3899
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8375 - loss: 0.3934 - val_accuracy: 0.8600 - val_loss: 0.3521
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8494 - loss: 0.3655 - val_accuracy: 0.8645 - val_loss: 0.3333
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8526 - loss: 0.3552 - val_accuracy: 0.8650 - val_loss: 0.3254
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8572 - loss: 0.3486 - val_accuracy: 0.8745 - val_loss: 0.3253
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8564 - loss: 0.3443 - val_accuracy: 0.8680 - val_loss: 0.3234
Epoch 7/100
[1m250/25

In [32]:
models.save('models.h5')



In [33]:
##load tensorboard extension 
%load_ext tensorboard

In [35]:
%tensorboard --logdirs logs/fit/20260218_015715

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Traceback (most recent call last):
  File [35m"<frozen runpy>"[0m, line [35m198[0m, in [35m_run_module_as_main[0m
  File [35m"<frozen runpy>"[0m, line [35m88[0m, in [35m_run_code[0m
  File [35m"c:\Users\shard\OneDrive\Desktop\ANN_platform\venv\Scripts\tensorboard.exe\__main__.py"[0m, line [35m2[0m, in [35m<module>[0m
    from tensorboard.main import run_main
  File [35m"C:\Users\shard\OneDrive\Desktop\ANN_platform\venv\Lib\site-packages\tensorboard\main.py"[0m, line [35m27[0m, in [35m<module>[0m
    from tensorboard import default
  File [35m"C:\Users\shard\OneDrive\Desktop\ANN_platform\venv\Lib\site-packages\tensorboard\default.py"[0m, line [35m30[0m, in [35m<module>[0m
    import pkg_resources
[1;35mModuleNotFoundError[0m: [35mNo module named 'pkg_resources'[0m