In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler , LabelEncoder ,OneHotEncoder
import pickle

In [18]:
#load data
data=pd.read_csv("/workspaces/codespaces-blank/Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [19]:
#preprocess data
#drop irrelevent column
data=data.drop(['RowNumber', "CustomerId" , "Surname"] , axis=1)
data.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [20]:
#encode gender using labalencode

label_gender_encoder=LabelEncoder()
data['Gender']=label_gender_encoder.fit_transform(data['Gender'])
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [21]:
#Geography encoder using onehotencoder
onehotencoder_geography=OneHotEncoder()
geography=onehotencoder_geography.fit_transform(data['Geography'].values.reshape(-1,1)).toarray()
geography=pd.DataFrame(geography,columns=['Geography_France','Geography_Germany','Geography_Spain'])
data=pd.concat([data,geography],axis=1)
data=data.drop('Geography',axis=1)


In [22]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [23]:
#save the  encoder label and onehotencoder
with open('/workspaces/codespaces-blank/label_gender_encoder' , 'wb') as f:
    pickle.dump(label_gender_encoder,f)

with open('/workspaces/codespaces-blank/onehotencoder_geography' , 'wb') as f:
    pickle.dump(onehotencoder_geography,f)

In [24]:
#divide data into features and target
X=data.drop('Exited',axis=1)
y=data['Exited']

#split data into train and test
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

#scale data
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [25]:
#save the scaler
with open('/workspaces/codespaces-blank/scaler' , 'wb') as f:
    pickle.dump(scaler,f)

In [28]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping , TensorBoard
import datetime

In [29]:
#build model
model=Sequential([
    Dense(units=64,activation='relu' , input_shape=(X_train.shape[1],)),
    Dense(units=32,activation='relu'),
    Dense(units=1,activation='sigmoid')
])


In [30]:
model.summary()

In [31]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [34]:
#setup tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [35]:
#early stopping
early_stopping=EarlyStopping(monitor='val_loss',patience=5)

In [36]:
#train model
model.fit(X_train,y_train,validation_split=0.2,epochs=100,batch_size=32,callbacks=[early_stopping,tensorboard_callback])

Epoch 1/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7403 - loss: 0.5269 - val_accuracy: 0.8163 - val_loss: 0.4252
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8292 - loss: 0.4138 - val_accuracy: 0.8331 - val_loss: 0.3973
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8451 - loss: 0.3746 - val_accuracy: 0.8431 - val_loss: 0.3739
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8471 - loss: 0.3669 - val_accuracy: 0.8481 - val_loss: 0.3657
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8571 - loss: 0.3474 - val_accuracy: 0.8500 - val_loss: 0.3595
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8586 - loss: 0.3379 - val_accuracy: 0.8494 - val_loss: 0.3629
Epoch 7/100
[1m200/20

<keras.src.callbacks.history.History at 0x7221f17a0410>

In [37]:
#save model
model.save('/workspaces/codespaces-blank/churn_model.h5')



In [None]:
#load tensorboard
%load_ext tensorboard
%tensorboard --logdir logs/fit


In [44]:
#load the model
from tensorflow.keras.models import load_model

model=load_model('/workspaces/codespaces-blank/churn_model.h5')

#load the scaler , labelencoder and onehotencoder
with open('/workspaces/codespaces-blank/scaler' , 'rb') as f:
    scaler=pickle.load(f)

with open('/workspaces/codespaces-blank/label_gender_encoder' , 'rb') as f:
    label= pickle.load(f)

with open('/workspaces/codespaces-blank/onehotencoder_geography' , 'rb') as f:
    onehot=pickle.load(f)






In [45]:
#predict
model.predict(scaler.transform(X_test))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step




array([[0.55302215],
       [0.0805454 ],
       [0.3759769 ],
       ...,
       [0.00151642],
       [0.29292125],
       [0.48372766]], dtype=float32)

In [46]:
#evaluate model
model.evaluate(X_test,y_test)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8677 - loss: 0.3336  


[0.34168267250061035, 0.8640000224113464]