In [None]:
# data preprocessing

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping



In [None]:
# test
print(f"tensorflow version: {tf.__version__}")

tensorflow version: 2.19.0


In [None]:
# test dataset upload

try:
    dataset = pd.read_excel('Churn_Modelling.xlsx')
except FileNotFoundError:
    print("error: 'Churn_Modelling.xlsx' not found.")
    exit()

In [None]:
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [None]:
# display initial shapes

print("data loading and initial shapes")
print(f"X shape before encoding: {X.shape}")
print(f"y shape: {y.shape}")

#first few rows

print("First few rows of x (before encoding!):\n", X[:3])
print("First few rows of y:\n", y[:3])

data loading and initial shapes
X shape before encoding: (10000, 10)
y shape: (10000,)
First few rows of x (before encoding!):
 [[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]]
First few rows of y:
 [1 0 1]


In [None]:
# encode our categorical data

ct = ColumnTransformer(
    transformers=[
          ('ohe_geo', OneHotEncoder(handle_unknown='ignore'), [1]),
          ('ohe_gender', OneHotEncoder(handle_unknown='ignore'), [2])
                  ],
    remainder='passthrough'
)

# transform X back to numpy array

X = ct.fit_transform(X)

In [None]:
# check TensorFlow/Keras compatibility

if hasattr(X, 'toarray'):
  X = X.toarray()

In [None]:
# printyprint

print("\nencoding categorical features")
print(f"X shape after encoding: {X.shape}")
print("First row of X post encoding:\n", X[0])



encoding categorical features
X shape after encoding: (10000, 13)
First row of X post encoding:
 [1.0 0.0 0.0 1.0 0.0 619 42 2 0.0 1 1 1 101348.88]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
# feature scaling time

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# printyprint2

print("train/test split and scaling is complete! yay!")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print("\nfirst in row of X_train:\n", X_train[0])

train/test split and scaling is complete! yay!
X_train shape: (8000, 13)
X_test shape: (2000, 13)

first in row of X_train:
 [ 1.00150113 -0.57946723 -0.57638802 -0.91324755  0.91324755  0.35649971
 -0.6557859   0.34567966 -1.21847056  0.80843615  0.64920267  0.97481699
  1.36766974]


In [None]:
# model building time!

# initialize ANN
classifier = Sequential()

# number of features determines our input dimension
input_dim = X_train.shape[1]

num_units_1 = 8
num_units_2 = 8

In [None]:
# adding our first hidden layer w relu activation function
classifier.add(Dense(units=num_units_1, activation='relu', input_shape=(input_dim,)))

# adding our second hidden layer w relu activation function
classifier.add(Dense(units=num_units_2, activation='relu'))

# + output layer, binary classification uses 1 unit and sigmoid activation

classifier.add(Dense(units=1, activation='sigmoid'))

print(classifier.summary())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None


In [None]:
# training time

# optimizer: adam for simplicity sake, thanks adam for efficient stochastic gradient descent
# loss: 'binary_crossentropy' for binary classification output layer w sigmoid
# metrics: accuracy

classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# i chose to implement early stopping to avoid overfitting and expidite training time. listed as optional, but i chose to.
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)

# fit the model to training set for 100+ epochs
history = classifier.fit(
    X_train, y_train,
    batch_size=32,
    epochs=101, # told me to train on over 100 epochs
    validation_split=0.1, # 10% of training data for val/early stopping
    callbacks=[es]
)

print("\ntraining complete!")

Epoch 1/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7626 - loss: 0.5451 - val_accuracy: 0.8037 - val_loss: 0.4516
Epoch 2/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7912 - loss: 0.4656 - val_accuracy: 0.8037 - val_loss: 0.4296
Epoch 3/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7912 - loss: 0.4529 - val_accuracy: 0.8037 - val_loss: 0.4212
Epoch 4/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7849 - loss: 0.4565 - val_accuracy: 0.8037 - val_loss: 0.4173
Epoch 5/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7842 - loss: 0.4487 - val_accuracy: 0.8112 - val_loss: 0.4144
Epoch 6/101
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8091 - loss: 0.4293 - val_accuracy: 0.8188 - val_loss: 0.4117
Epoch 7/101
[1m225/22

In [None]:
# predicting and reporting time
y_pred_proba = classifier.predict(X_test)

# adding in a threshold to convert probabilities to binary predictions
y_pred = (y_pred_proba > 0.5).astype(int)


# calculating our confusion matrix and accuracy score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [None]:
print("\n" + "="*50)
print("             model performance report")
print("="*50)
print("model architecture summary:")
print(f"input layer (features): {input_dim}")
print(f"hidden layer 1 units: {num_units_1} (activation: ReLU)")
print(f"hidden layer 2 units: {num_units_2} (activation: ReLU)")
print(f"output layer units: 1 (activation: Sigmoid)")
print(f"optimizer: adam, loss function: binary crossentropy")
print("\ntest set results:")
print(f"accuracy score: {accuracy:.4f}")
print("confusion matrix:")
print(f"{cm}")
print("\nconfusion matrix interpretation:")
print(f"[[{cm[0, 0]} (true negs)  {cm[0, 1]} (false positives)]")
print(f" [{cm[1, 0]} (false negs) {cm[1, 1]} (true positives)]]")
print("="*50 + "\n")


             model performance report
model architecture summary:
input layer (features): 13
hidden layer 1 units: 8 (activation: ReLU)
hidden layer 2 units: 8 (activation: ReLU)
output layer units: 1 (activation: Sigmoid)
optimizer: adam, loss function: binary crossentropy

test set results:
accuracy score: 0.8630
confusion matrix:
[[1531   76]
 [ 198  195]]

confusion matrix interpretation:
[[1531 (true negs)  76 (false positives)]
 [198 (false negs) 195 (true positives)]]



In [None]:
# indiv customer

# total features = 13 (3 for Geography, 2 for Gender, 8 numerical)
new_customer = np.array([
    # OHE for geography (France, assuming order of categories from training is France, Germany, Spain)
    1.0, 0.0, 0.0,
    # OHE for gender (Male, assuming order of categories from training is Female, Male)
    0.0, 1.0,
    # remaining columns (in order: CreditScore, Age, Tenure, Balance, NumOfProducts, HasCrCard, IsActiveMember, EstimatedSalary)
    600.0, 40.0, 3.0, 60000.0, 2.0, 1.0, 1.0, 50000.0
]).reshape(1, -1)

# apply our same scaling
new_customer_scaled = sc.transform(new_customer)

# prediction time
new_prediction_proba = classifier.predict(new_customer_scaled)
new_prediction = (new_prediction_proba > 0.5).astype(int)

# print indiv prediction result
print(f"\nindiv customer prediction")
print(f"customer info: Geography: France, Credit Score: 600, Gender: Male, Age: 40, Tenure: 3, Balance: $60000, NumOfProducts: 2, HasCrCard: Yes, IsActiveMember: Yes, Estimated Salary: $50000")
print(f"predicted churn probability: {new_prediction_proba[0][0]:.4f}")
print(f"predicted churn (1=Yes, 0=No, Threshold=0.5): {new_prediction[0][0]}")

if new_prediction[0][0] == 1:
    print("\nconclusion: The model predicts that we SHOULD say goodbye to this customer (Predicted Churn: YES). Bye!!!")
else:
    print("\nConclusion: The model predicts that we SHOULD NOTTTT say goodbye to this customer (Predicted Churn: NO).")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step

indiv customer prediction
customer info: Geography: France, Credit Score: 600, Gender: Male, Age: 40, Tenure: 3, Balance: $60000, NumOfProducts: 2, HasCrCard: Yes, IsActiveMember: Yes, Estimated Salary: $50000
predicted churn probability: 0.0331
predicted churn (1=Yes, 0=No, Threshold=0.5): 0

Conclusion: The model predicts that we SHOULD NOTTTT say goodbye to this customer (Predicted Churn: NO).
