In [1]:
# Given a bank customer, build a neural network-based classifier that can determine whether 
# they will leave or not in the next 6 months.
# Dataset Description: The case study is from an open-source dataset from Kaggle.
# The dataset contains 10,000 sample points with 14 distinct features such as
# CustomerId, CreditScore, Geography, Gender, Age, Tenure, Balance, etc.
# Link to the Kaggle project:
# https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling
# Perform following steps:
# 1. Read the dataset.
# 2. Distinguish the feature and target set and divide the data set into training and test sets.
# 3. Normalize the train and test data. 
# 4. Initialize and build the model. Identify the points of improvement and implement the same. 
# 5. Print the accuracy score and confusion matrix (5 points).



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
data = pd.read_csv("churn_Modelling.csv")


In [4]:
X = data.drop(columns=['CustomerId', 'Surname', 'Exited'])
y = data['Exited']

In [5]:
numeric_cols = data.select_dtypes(include=[np.number]).columns
categorical_cols = [col for col in data.columns if col not in numeric_cols]

In [6]:
encoder = OneHotEncoder(sparse=False)
encoded_categorical_cols = encoder.fit_transform(data[categorical_cols])



In [7]:
X = np.hstack((data[numeric_cols], encoded_categorical_cols))
y = data['Exited']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_dim=X_train.shape[1]),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

In [11]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x27563797f10>

In [13]:
y_pred = (model.predict(X_test) > 0.5)
accuracy = accuracy_score(y_test, y_pred)
cm1 = confusion_matrix(y_test, y_pred)



In [14]:
accuracy_model1 = ((cm1[0][0]+cm1[1][1])*100)/(cm1[0][0]+cm1[1][1]+cm1[0][1]+cm1[1][0])
print (accuracy_model1, '% of testing data was classified correctly')
print("Confusion Matrix:")
print(cm1)

95.1 % of testing data was classified correctly
Confusion Matrix:
[[1579   28]
 [  70  323]]
