In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
from sklearn.metrics import recall_score, f1_score, mean_squared_error, mean_absolute_error
from sklearn.neural_network import MLPClassifier
from math import sqrt

df = pd.read_csv("Churn_Modelling.csv")
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

# Encode categorical variables
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])  
df = pd.get_dummies(df, columns=['Geography'], drop_first=True)
# Split data
X = df.drop(columns=['Exited'])
y = df['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [16]:
# Decision Tree
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)

# ANN
ann = MLPClassifier(hidden_layer_sizes=(100,), max_iter=10000, learning_rate_init=0.001)
ann.fit(X_train, y_train)

MLPClassifier(max_iter=10000)

In [17]:
# Predictions
y_pred_tree = dtree.predict(X_test)
y_pred_ann = ann.predict(X_test)

# Evaluation Metrics
# Confusion Matrix, Accuracy, Precision, Recall, F1 Score
cm_tree = confusion_matrix(y_test, y_pred_tree)
accuracy_tree = accuracy_score(y_test, y_pred_tree)
precision_tree = precision_score(y_test, y_pred_tree, average='macro')
recall_tree = recall_score(y_test, y_pred_tree, average='macro')
f1_tree = f1_score(y_test, y_pred_tree, average='macro')

cm_ann = confusion_matrix(y_test, y_pred_ann)
accuracy_ann = accuracy_score(y_test, y_pred_ann)
precision_ann = precision_score(y_test, y_pred_ann, average='macro')
recall_ann = recall_score(y_test, y_pred_ann, average='macro')
f1_ann = f1_score(y_test, y_pred_ann, average='macro')

# Error Metrics (for regression models; here for classification context, hence simulated as incorrect use)
mse_tree = mean_squared_error(y_test, y_pred_tree)
rmse_tree = sqrt(mse_tree)
mae_tree = mean_absolute_error(y_test, y_pred_tree)

mse_ann = mean_squared_error(y_test, y_pred_ann)
rmse_ann = sqrt(mse_ann)
mae_ann = mean_absolute_error(y_test, y_pred_ann)

# Printing metrics
print("Decision Tree Metrics:")
print("Confusion Matrix:\n", cm_tree)
print("Accuracy:", accuracy_tree)
print("Precision:", precision_tree)
print("Recall:", recall_tree)
print("F1 Score:", f1_tree)
print("MSE:", mse_tree)
print("RMSE:", rmse_tree)
print("MAE:", mae_tree)

print("\nANN Metrics:")
print("Confusion Matrix:\n", cm_ann)
print("Accuracy:", accuracy_ann)
print("Precision:", precision_ann)
print("Recall:", recall_ann)
print("F1 Score:", f1_ann)
print("MSE:", mse_ann)
print("RMSE:", rmse_ann)
print("MAE:", mae_ann)

Decision Tree Metrics:
Confusion Matrix:
 [[1356  237]
 [ 203  204]]
Accuracy: 0.78
Precision: 0.6661866799317546
Recall: 0.6762263033449474
F1 Score: 0.6707690834211283
MSE: 0.22
RMSE: 0.469041575982343
MAE: 0.22

ANN Metrics:
Confusion Matrix:
 [[1493  100]
 [ 193  214]]
Accuracy: 0.8535
Precision: 0.7835282695257308
Recall: 0.7315119433763502
F1 Score: 0.7521317305646533
MSE: 0.1465
RMSE: 0.38275318418009274
MAE: 0.1465
