In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [3]:
import pandas as pd
import numpy as np

# Number of rows
num_rows = 2000
np.random.seed(42)

# Sample data for each column
customer_ids = np.arange(100000, 100000 + num_rows)
surnames = [f"Surname{i}" for i in range(num_rows)]
credit_scores = np.random.randint(300, 850, size=num_rows)
locations = np.random.choice(['New York', 'California', 'Texas', 'Florida', 'Illinois'], size=num_rows)
genders = np.random.choice(['Male', 'Female'], size=num_rows)
ages = np.random.randint(18, 90, size=num_rows)
tenures = np.random.randint(0, 11, size=num_rows)
balances = np.round(np.random.uniform(0, 250000, size=num_rows), 2)
num_of_products = np.random.randint(1, 5, size=num_rows)
has_credit_card = np.random.choice([0, 1], size=num_rows)
is_active_member = np.random.choice([0, 1], size=num_rows)
estimated_salaries = np.round(np.random.uniform(10000, 200000, size=num_rows), 2)
exited = np.random.choice([0, 1], size=num_rows)

# Create the DataFrame
data = pd.DataFrame({
    'CustomerID': customer_ids,
    'Surname': surnames,
    'CreditScore': credit_scores,
    'Location': locations,
    'Gender': genders,
    'Age': ages,
    'Tenure': tenures,
    'Balance': balances,
    'NumOfProducts': num_of_products,
    'HasCreditCard': has_credit_card,
    'IsActiveMember': is_active_member,
    'EstimatedSalary': estimated_salaries,
    'Exited': exited
})

print(data.head())


   CustomerID   Surname  CreditScore    Location  Gender  Age  Tenure  \
0      100000  Surname0          402  California  Female   69       5   
1      100001  Surname1          735  California    Male   66       4   
2      100002  Surname2          570    New York    Male   54       2   
3      100003  Surname3          406     Florida    Male   43       9   
4      100004  Surname4          371    Illinois  Female   57       9   

     Balance  NumOfProducts  HasCreditCard  IsActiveMember  EstimatedSalary  \
0   42349.12              4              1               1        127384.90   
1  155204.70              2              0               0        152807.11   
2  179747.69              3              0               0         50660.01   
3  142380.32              3              1               0        146133.07   
4  184587.82              1              1               1         53246.01   

   Exited  
0       1  
1       1  
2       1  
3       0  
4       1  


In [4]:
data.head()

Unnamed: 0,CustomerID,Surname,CreditScore,Location,Gender,Age,Tenure,Balance,NumOfProducts,HasCreditCard,IsActiveMember,EstimatedSalary,Exited
0,100000,Surname0,402,California,Female,69,5,42349.12,4,1,1,127384.9,1
1,100001,Surname1,735,California,Male,66,4,155204.7,2,0,0,152807.11,1
2,100002,Surname2,570,New York,Male,54,2,179747.69,3,0,0,50660.01,1
3,100003,Surname3,406,Florida,Male,43,9,142380.32,3,1,0,146133.07,0
4,100004,Surname4,371,Illinois,Female,57,9,184587.82,1,1,1,53246.01,1


# Preprocessing the data

In [5]:
##Drop irrelevent columns
data = data.drop(['CustomerID', 'Surname'], axis=1)
data

Unnamed: 0,CreditScore,Location,Gender,Age,Tenure,Balance,NumOfProducts,HasCreditCard,IsActiveMember,EstimatedSalary,Exited
0,402,California,Female,69,5,42349.12,4,1,1,127384.90,1
1,735,California,Male,66,4,155204.70,2,0,0,152807.11,1
2,570,New York,Male,54,2,179747.69,3,0,0,50660.01,1
3,406,Florida,Male,43,9,142380.32,3,1,0,146133.07,0
4,371,Illinois,Female,57,9,184587.82,1,1,1,53246.01,1
...,...,...,...,...,...,...,...,...,...,...,...
1995,407,Texas,Male,79,7,18665.41,1,0,0,139722.99,1
1996,666,Illinois,Male,70,2,228610.09,4,0,0,68415.42,0
1997,580,California,Male,21,6,5160.86,2,0,1,116285.10,1
1998,427,Texas,Female,42,1,240587.53,2,1,1,187219.13,0


In [6]:
##endoding the categorical variables 
label_encoder_gender = LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Location,Gender,Age,Tenure,Balance,NumOfProducts,HasCreditCard,IsActiveMember,EstimatedSalary,Exited
0,402,California,0,69,5,42349.12,4,1,1,127384.90,1
1,735,California,1,66,4,155204.70,2,0,0,152807.11,1
2,570,New York,1,54,2,179747.69,3,0,0,50660.01,1
3,406,Florida,1,43,9,142380.32,3,1,0,146133.07,0
4,371,Illinois,0,57,9,184587.82,1,1,1,53246.01,1
...,...,...,...,...,...,...,...,...,...,...,...
1995,407,Texas,1,79,7,18665.41,1,0,0,139722.99,1
1996,666,Illinois,1,70,2,228610.09,4,0,0,68415.42,0
1997,580,California,1,21,6,5160.86,2,0,1,116285.10,1
1998,427,Texas,0,42,1,240587.53,2,1,1,187219.13,0


In [7]:
## one-hot encode the nominal data "Location"
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_location=OneHotEncoder()
locaton_encoder = onehot_encoder_location.fit_transform(data[['Location']])
locaton_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2000 stored elements and shape (2000, 5)>

In [8]:
onehot_encoder_location.get_feature_names_out(['Location'])

array(['Location_California', 'Location_Florida', 'Location_Illinois',
       'Location_New York', 'Location_Texas'], dtype=object)

In [9]:
location_encoded_df=pd.DataFrame(locaton_encoder.toarray(),columns=onehot_encoder_location.get_feature_names_out(['Location']))
location_encoded_df

Unnamed: 0,Location_California,Location_Florida,Location_Illinois,Location_New York,Location_Texas
0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0
3,0.0,1.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...
1995,0.0,0.0,0.0,0.0,1.0
1996,0.0,0.0,1.0,0.0,0.0
1997,1.0,0.0,0.0,0.0,0.0
1998,0.0,0.0,0.0,0.0,1.0


In [10]:
##combine onehotencoded columns
data=pd.concat([data.drop('Location',axis=1), location_encoded_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCreditCard,IsActiveMember,EstimatedSalary,Exited,Location_California,Location_Florida,Location_Illinois,Location_New York,Location_Texas
0,402,0,69,5,42349.12,4,1,1,127384.9,1,1.0,0.0,0.0,0.0,0.0
1,735,1,66,4,155204.7,2,0,0,152807.11,1,1.0,0.0,0.0,0.0,0.0
2,570,1,54,2,179747.69,3,0,0,50660.01,1,0.0,0.0,0.0,1.0,0.0
3,406,1,43,9,142380.32,3,1,0,146133.07,0,0.0,1.0,0.0,0.0,0.0
4,371,0,57,9,184587.82,1,1,1,53246.01,1,0.0,0.0,1.0,0.0,0.0


In [11]:
##save the encoders with sscalar
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
    
with open('onehot_encoder_location.pkl','wb') as file:
    pickle.dump(onehot_encoder_location, file)

In [12]:
##Divide the dataset to the independent and dependent features
X = data.drop('Exited',axis=1)
y = data['Exited']

## split the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Scaling the features

scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [13]:
with open('scalar.pkl','wb') as file:
    pickle.dump(scalar, file)

In [14]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCreditCard,IsActiveMember,EstimatedSalary,Exited,Location_California,Location_Florida,Location_Illinois,Location_New York,Location_Texas
0,402,0,69,5,42349.12,4,1,1,127384.90,1,1.0,0.0,0.0,0.0,0.0
1,735,1,66,4,155204.70,2,0,0,152807.11,1,1.0,0.0,0.0,0.0,0.0
2,570,1,54,2,179747.69,3,0,0,50660.01,1,0.0,0.0,0.0,1.0,0.0
3,406,1,43,9,142380.32,3,1,0,146133.07,0,0.0,1.0,0.0,0.0,0.0
4,371,0,57,9,184587.82,1,1,1,53246.01,1,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,407,1,79,7,18665.41,1,0,0,139722.99,1,0.0,0.0,0.0,0.0,1.0
1996,666,1,70,2,228610.09,4,0,0,68415.42,0,0.0,0.0,1.0,0.0,0.0
1997,580,1,21,6,5160.86,2,0,1,116285.10,1,1.0,0.0,0.0,0.0,0.0
1998,427,0,42,1,240587.53,2,1,1,187219.13,0,0.0,0.0,0.0,0.0,1.0


# ANN Implementation

In [15]:
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense
 from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
 import datetime

In [16]:
(X_train.shape[1],)

(14,)

In [17]:
##Build ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  #hidden layer 1 connected to the input layer
    Dense(32, activation='relu'),
    Dense(1,activation='sigmoid') # ojutput layer
])




In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                960       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 3073 (12.00 KB)
Trainable params: 3073 (12.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [19]:
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss=tensorflow.keras.losses.BinaryCrossentropy()

In [20]:
#compile the model
model.compile(optimizer=opt, loss="binary_crossentropy", metrics="['accuracy']")
  # if it is not a binary model then we cam use sparse_crossentropy

In [21]:
#setup the tensorboard
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
log_dir =" logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, write_graph=False)

In [22]:
#setup EarlyStopping
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)


In [23]:
tf.config.run_functions_eagerly(True)


In [24]:
import tensorflow as tf
import datetime
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

# Optimizer and loss
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

# Compile the model
model.compile(
    optimizer=opt,
    loss=loss,
    metrics=['accuracy']  # ← Fix was here
)

# Setup TensorBoard
log_dir = "logs/fit_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Early stopping
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=15, restore_best_weights=True)

# Optional: run eagerly for debugging
tf.config.run_functions_eagerly(True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorflow_callback, early_stopping_callback]
)




Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


In [28]:
model.save('model.h5')

In [29]:
#Load tensorboard extensions
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [30]:
%tensorboard --logdir logs/fit_20250618-130145

Reusing TensorBoard on port 6007 (pid 2392), started 20:50:40 ago. (Use '!kill 2392' to kill it.)

In [None]:
#load the pickle files 
