# Importing Libraries

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.metrics import confusion_matrix,classification_report
from tensorflow import keras

# Loading the Dataset

In [10]:
df = pd.read_csv('./data/Churn_Modelling.csv')

In [12]:
df.head(3)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


# Cleaning

### Dropping unnecessary columns

In [13]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

In [14]:
df.isna().sum() 

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

No null values

# Separating the Features and the Labels

In [31]:
X=df.iloc[:, :df.shape[1]-1].values 
y=df.iloc[:, -1].values 
X.shape, y.shape

((10000, 10), (10000,))

# Encoding Categorical Data

In [16]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [17]:
country_encoder = LabelEncoder()  # Label Encoder for country

X[:,1] = country_encoder.fit_transform(X[:,1])

This code uses a LabelEncoder to convert categorical data in the second column (country) of the X dataset into numerical values.

In [18]:
gender_encoder = LabelEncoder()  # Label Encoder for gender

X[:,2] = gender_encoder.fit_transform(X[:,2])

This code uses a LabelEncoder to convert categorical data in the third column (gender) of the X dataset into numerical values.

In [19]:
X

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

X after encoding 'country' and 'gender'

# Splitting the Database

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Normalize the train and test data

In [21]:
sc=StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

This code is using the StandardScaler from scikit-learn to standardize (normalize) the feature data in a machine learning dataset, separately for the training and testing sets

# Initialize & Build the Model

In [23]:
classifier = keras.Sequential([
    keras.layers.Dense(32, input_shape=(10,), activation='relu'),  # Hidden Layer 1
    keras.layers.Dense(13, activation='relu'),  # Hidden Layer 2
    keras.layers.Dense(1, activation='sigmoid')  # Output Layer
])

classifier.compile(optimizer='adam',
              loss='binary_crossentropy',  # Generally used for binary classification
              metrics=['accuracy'])

classifier.fit(X_train, y_train, epochs=10)

2023-11-19 23:39:17.605266: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f9f56121a20>

This code defines a neural network model for binary classification using Keras:

The model has three layers: 
an input layer with 10 features,<br> 
two hidden layers with ReLU activation functions, and <br>
an output layer with a sigmoid activation function.

It's compiled with the 'adam' optimizer, 'binary_crossentropy' loss function (common for binary classification), and 'accuracy' as the evaluation metric.

In [24]:
y_pred = classifier.predict(X_test)
y_pred  # Predicted Values

 1/63 [..............................] - ETA: 8s



array([[0.36845535],
       [0.29739445],
       [0.19011238],
       ...,
       [0.14949389],
       [0.19996722],
       [0.18568693]], dtype=float32)

In [25]:
y_pred = (y_pred > 0.5)
y_pred 

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

if y_pred > 0.5 customer will not leave

In [26]:
score = classifier.evaluate(X_train, y_train)  # Classifier score on training set

print("Train Loss:", score[0])
print("Train Accuracy:", score[1])

 40/250 [===>..........................] - ETA: 0s - loss: 0.3376 - accuracy: 0.8562 

Train Loss: 0.33576127886772156
Train Accuracy: 0.8586249947547913


In [27]:
score = classifier.evaluate(X_test, y_test)  # Classifier score on testing set

print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

Test Loss: 0.345601350069046
Test Accuracy: 0.8604999780654907


# Print the Accuracy Score and Confusion Matrix

In [28]:
cm = confusion_matrix(y_test, y_pred)  # Confusion Matrix
cm

array([[1513,   82],
       [ 197,  208]])

In [29]:
cr = classification_report(y_test, y_pred)  # Classification Report
print(cr)

              precision    recall  f1-score   support

           0       0.88      0.95      0.92      1595
           1       0.72      0.51      0.60       405

    accuracy                           0.86      2000
   macro avg       0.80      0.73      0.76      2000
weighted avg       0.85      0.86      0.85      2000

