In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.combine import SMOTEENN

In [2]:
# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title, ax):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(title)
    ax.set_ylabel('Actual')
    ax.set_xlabel('Predicted')

In [3]:
df = pd.read_csv("tel_churn_.csv")
df = df.drop('Unnamed: 0', axis=1)

In [None]:
x = df.drop('Churn', axis=1)
y = df['Churn']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100, stratify=y)

In [None]:
# Initialize figure for plotting 4 confusion matrices
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
plt.subplots_adjust(hspace=0.4, wspace=0.3)

In [None]:
# MODEL 1: DECISION TREE
print("1. Decision Tree Model")
model_dt = DecisionTreeClassifier(criterion='gini', random_state=100, max_depth=6, min_samples_leaf=8)
model_dt.fit(x_train, y_train)

In [None]:
y_pred_dt = model_dt.predict(x_test)
print(classification_report(y_test, y_pred_dt, labels=[0, 1]))

In [None]:
plot_confusion_matrix(y_test, y_pred_dt, "1. Decision Tree", axes[0, 0])

In [None]:
# MODEL 2: DECISION TREE + TUNING + SMOTEENN
print("\n2. Decision Tree (Tuned + SMOTEENN)")

In [None]:
sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x, y)

In [None]:
xr_train, xr_test, yr_train, yr_test = train_test_split(x_resampled, y_resampled, test_size=0.2, random_state=100)

In [None]:
param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [4, 6, 8, 10, 12],
    'min_samples_leaf': [2, 4, 8, 10],
    'min_samples_split': [2, 5, 10]
}

In [None]:
grid_search_dt = GridSearchCV(estimator=DecisionTreeClassifier(random_state=100),
                              param_grid=param_grid_dt,
                              cv=5,
                              n_jobs=-1,
                              verbose=1)

grid_search_dt.fit(xr_train, yr_train)

In [None]:
best_dt = grid_search_dt.best_estimator_
y_pred_dt_tuned = best_dt.predict(xr_test)

In [None]:
print("Best Parameters (DT):", grid_search_dt.best_params_)
print(classification_report(yr_test, y_pred_dt_tuned, labels=[0, 1]))

In [None]:
plot_confusion_matrix(yr_test, y_pred_dt_tuned, "2. DT + Tuned + SMOTEENN", axes[0, 1])
