In [37]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pickle


In [38]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [39]:
# Drop unnecessary columns
input_data=df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
# One-hot encode 'Geography' column
ohe_geography = OneHotEncoder(drop='first', sparse_output=False)
input_data_array = ohe_geography.fit_transform(input_data[['Geography']])
input_data_array_df=pd.DataFrame(input_data_array, columns=ohe_geography.get_feature_names_out(['Geography']))
input_data=pd.concat([input_data, input_data_array_df], axis=1)
input_data.drop(['Geography'], axis=1, inplace=True)

# Label encode 'Gender' column
le_gender = LabelEncoder()
input_data['Gender'] = le_gender.fit_transform(input_data['Gender'])

# Split data into features and target
X = input_data.drop('EstimatedSalary', axis=1).values
y = input_data['EstimatedSalary'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [40]:
# Save the scaler, encoders
with open('scalerr.pkl', 'wb') as f:
    pickle.dump(scaler, f)
with open('ohe_geography.pkl', 'wb') as f:
    pickle.dump(ohe_geography, f)
with open('le_gender.pkl', 'wb') as f:
    pickle.dump(le_gender, f)

In [41]:
input_data.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'Geography_Germany', 'Geography_Spain'],
      dtype='object')

In [42]:
## Ann For Regression Problem
model = Sequential(
    [
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(1)  # No activation function for regression output
    ]
)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [43]:
# Early stopping callback, TensorBoard callback added
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
log_dir = 'regression_logs/fit/'+pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [44]:
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, tensorboard_callback]
)

Epoch 1/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 13308254208.0000 - mae: 99978.5625 - val_loss: 13697240064.0000 - val_mae: 102034.9062
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 13241372672.0000 - mae: 99641.9766 - val_loss: 13557372928.0000 - val_mae: 101349.1016
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12988995584.0000 - mae: 98383.1172 - val_loss: 13159399424.0000 - val_mae: 99399.6016
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12438549504.0000 - mae: 95643.8125 - val_loss: 12422732800.0000 - val_mae: 95783.8438
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 11551000576.0000 - mae: 91239.0703 - val_loss: 11348542464.0000 - val_mae: 90467.8906
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss

In [31]:
# load extension for tensorboard
%load_ext tensorboard
%tensorboard --logdir regression_logs/fit

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 9192), started 4:09:52 ago. (Use '!kill 9192' to kill it.)

In [32]:
## Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test MAE: {test_mae}')

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 3365486336.0000 - mae: 50138.4414
Test MAE: 50138.44140625


In [33]:
## Save the model
model.save('salary_regression_model.h5')

