### Importing Libraries

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import pickle
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [2]:
data=pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Preprocessing

In [3]:
data.drop(["RowNumber","CustomerId","Surname"],axis=1,inplace=True)

In [4]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
data["Geography"].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [6]:
data["Gender"].unique()

array(['Female', 'Male'], dtype=object)

In [7]:
labelencoder=LabelEncoder()
data["Gender"]=labelencoder.fit_transform(data["Gender"])

In [12]:

onehotencoder = OneHotEncoder()

# Reshape the column into a 2D array
geography_reshaped = data["Geography"].values.reshape(-1, 1)

# Apply one-hot encoding
geography_encoded = onehotencoder.fit_transform(geography_reshaped).toarray()

# Create a DataFrame for the encoded values
geography_encoded_df = pd.DataFrame(geography_encoded, columns=onehotencoder.get_feature_names_out(['Geography']))

# Drop the original 'Geography' column and concatenate the one-hot encoded DataFrame
data = data.drop("Geography", axis=1)
data = pd.concat([data, geography_encoded_df], axis=1)


In [13]:
data.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'Geography_0', 'Geography_1', 'Geography_2'],
      dtype='object')

In [14]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_0,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [15]:
# saving encoder and standard scaler
pickle.dump(labelencoder, open('labelencoder.pkl','wb'))
pickle.dump(onehotencoder, open('onehotencoder.pkl','wb'))


In [17]:
# Seperating the dependent and independent variables
X=data.drop("Exited",axis=1)
y=data["Exited"]

In [19]:
# splitting the data into training and testing
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [21]:
# scaling the data
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [22]:
pickle.dump(sc, open('scaler.pkl','wb'))

(8000, 12)

### ANN Implmentation

In [31]:
model=Sequential([
Dense(64,activation="relu",input_shape=(X_train.shape[1],)), # Hidden Layer 1 Connected with Hidden Layer
Dense(32,activation="relu"), # Hidden Layer 2 Connected with Output Layer
Dense(1,activation="sigmoid") # Output Layer
])


In [32]:
model.summary()

In [34]:
opt=tf.keras.optimizers.Adam(learning_rate=0.01)

In [35]:
model.compile(optimizer=opt,loss="binary_crossentropy",metrics=["accuracy"])

In [38]:
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tf_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [41]:
earlystopping_callback=EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)

In [42]:
history=model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=100,callbacks=[tf_callback,earlystopping_callback])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8653 - loss: 0.3351 - val_accuracy: 0.8615 - val_loss: 0.3390
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8635 - loss: 0.3282 - val_accuracy: 0.8620 - val_loss: 0.3410
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8626 - loss: 0.3372 - val_accuracy: 0.8585 - val_loss: 0.3428
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8678 - loss: 0.3256 - val_accuracy: 0.8595 - val_loss: 0.3434
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8724 - loss: 0.3226 - val_accuracy: 0.8630 - val_loss: 0.3407
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8718 - loss: 0.3200 - val_accuracy: 0.8510 - val_loss: 0.3587
Epoch 7/100
[1m250/25

In [43]:
model.save("model.h5")

