In [61]:
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
import pickle
from sklearn.model_selection import train_test_split

In [62]:
df = pd.read_csv('Churn_Modelling.csv')

In [63]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [64]:
df=df.iloc[:, 3:]

In [None]:
gender_encoder = LabelEncoder()
df["Gender"]=gender_encoder.fit_transform(df["Gender"])

In [66]:
df["Gender"].head()

0    0
1    0
2    0
3    0
4    0
Name: Gender, dtype: int32

In [67]:
geo_onehot_encoder = OneHotEncoder()
onehot_encoded = geo_onehot_encoder.fit_transform(df["Geography"].values.reshape(-1,1)).toarray()
geo_df = pd.DataFrame(onehot_encoded, columns = ["France", "Germany", "Spain"])
df = pd.concat([df, geo_df], axis=1)

In [68]:
df.drop("Geography", axis=1, inplace=True)

In [69]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [70]:
X = df.drop("EstimatedSalary", axis=1)
Y = df["EstimatedSalary"]

In [71]:
X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,0,0.0,0.0,1.0


In [72]:
Y.head()

0    101348.88
1    112542.58
2    113931.57
3     93826.63
4     79084.10
Name: EstimatedSalary, dtype: float64

In [73]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [74]:
X_test

array([[-0.57749609,  0.91324755, -0.6557859 , ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.29729735,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.52560743, -1.09499335,  0.48508334, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.81311987, -1.09499335,  0.77030065, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.41876609,  0.91324755, -0.94100321, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.24540869,  0.91324755,  0.00972116, ..., -0.99850112,
         1.72572313, -0.57638802]])

In [75]:
with open("r_label_encoder_gender.pkl", "wb") as file:
    pickle.dump(gender_encoder, file)
with open("r_onehot_encoder_geo.pkl", "wb") as file:
    pickle.dump(geo_onehot_encoder, file)
with open("r_scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)

## ANN Regression

In [76]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

In [77]:
## Build the model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)
])

## Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])

In [78]:
model.summary()

In [79]:
## train the model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

### Set up tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [80]:
## Set up early stopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [81]:
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=100, callbacks=[tensorboard_callback, early_stopping_callback])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 100472.1406 - mae: 100472.1406 - val_loss: 98534.5938 - val_mae: 98534.5938
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 100506.1953 - mae: 100506.1953 - val_loss: 97192.5234 - val_mae: 97192.5234
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 97763.4297 - mae: 97763.4297 - val_loss: 93770.1953 - val_mae: 93770.1953
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 93185.0391 - mae: 93185.0391 - val_loss: 88038.3359 - val_mae: 88038.3359
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 88305.2656 - mae: 88305.2656 - val_loss: 80472.6016 - val_mae: 80472.6016
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 79276.5078 - mae: 79276.5078 - val_loss: 72086.6406 - va

<keras.src.callbacks.history.History at 0x26e06dfcf80>

In [82]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [83]:
%tensorboard --logdir logs/fit --port=6007

Reusing TensorBoard on port 6007 (pid 5400), started 0:10:54 ago. (Use '!kill 5400' to kill it.)

In [84]:
!kill 36364

'kill' is not recognized as an internal or external command,
operable program or batch file.


In [85]:
## Evaluate the model on test data
test_loss, test_mae = model.evaluate(X_test, Y_test)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 51175.9570 - mae: 51175.9570 


In [86]:
print(f"Test loss: {test_loss}")
print(f"Test MAE: {test_mae}")

Test loss: 50371.8046875
Test MAE: 50371.8046875


In [87]:
model.save("r_model.keras")

In [88]:
model.predict(X_test[:10])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


array([[ 85251.35 ],
       [ 92356.55 ],
       [104489.66 ],
       [ 95204.43 ],
       [108627.61 ],
       [ 95276.016],
       [111888.28 ],
       [104443.25 ],
       [ 99101.72 ],
       [ 98465.89 ]], dtype=float32)

In [89]:
Y_test[:10]

6252     41788.37
4684    146379.30
1731     58561.31
4742    170679.74
4521    114669.79
6340    149418.41
576      75685.97
5202     70529.00
6363     16618.76
439     164104.74
Name: EstimatedSalary, dtype: float64