In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
dataset = pd.read_csv('Churn_Modelling.csv')  # Replace with your dataset file name
print(dataset.head())

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

In [3]:
X = dataset.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = dataset['Exited']

In [4]:
# Encode categorical data (Gender, Geography)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [5]:
# Encode 'Gender'
labelencoder_gender = LabelEncoder()
X['Gender'] = labelencoder_gender.fit_transform(X['Gender'])  # Female = 0, Male = 1

In [6]:
# One-hot encode 'Geography'
columnTransformer = ColumnTransformer(
    [('encoder', OneHotEncoder(), ['Geography'])],
    remainder='passthrough'
)
X = np.array(columnTransformer.fit_transform(X))
X = X[:, 1:]

In [7]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [8]:
# Step 3: Normalize the train and test data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Step 4: Initialize and build the model
model = Sequential()
model.add(Dense(units=16, activation='relu', input_dim=X_train.shape[1]))  # Input layer
model.add(Dense(units=8, activation='relu'))  # Hidden layer
model.add(Dense(units=1, activation='sigmoid'))  # Output layer

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
# Fit the model
model.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7663 - loss: 0.5390
Epoch 2/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8164 - loss: 0.4270
Epoch 3/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8278 - loss: 0.4103
Epoch 4/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8468 - loss: 0.3719
Epoch 5/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8475 - loss: 0.3690
Epoch 6/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8534 - loss: 0.3466
Epoch 7/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8569 - loss: 0.3439
Epoch 8/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8565 - loss: 0.3474
Epoch 9/100
[1m800/800[0m [32

<keras.src.callbacks.history.History at 0x79d7b5292140>

In [12]:
# Step 5: Print the accuracy score and confusion matrix
# Predicting the test set results
y_pred = (model.predict(X_test) > 0.5).astype(int)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [13]:
# Accuracy and Confusion Matrix
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [14]:
print(f"Accuracy Score: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy Score: 85.35%
Confusion Matrix:
[[1499   96]
 [ 197  208]]
