In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
from xgboost import XGBClassifier

In [None]:
df = pd.read_csv('dataset.csv')
df.drop(columns = ['area_code', 'state'], inplace=True)
df = df.drop(columns = ['total_day_charge', 'total_eve_charge', 'total_night_charge', 'total_intl_charge'])
df.head(10)

In [None]:
df['international_plan'] = df['international_plan'].map({'no': 0,'yes':1 })
df['voice_mail_plan'] = df['voice_mail_plan'].map({'no': 0,'yes':1 })
df['churn']  = df['churn'].map({'no': 0,'yes':1 })
df.head(10)

In [None]:
X = df.drop(columns = 'churn')
y = df['churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [None]:
print(f"Train size: {len(X_train)} samples")
print(f"Test size: {len(X_test)} samples")

In [None]:
print('Before SMOTE')
print(f'Total Samples: {len(X_train)}')
print(f'Class Distribution: \n {y_train.value_counts()}')


In [None]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [None]:
print('After SMOTE')
print(f'Total Samples: {len(X_train_smote)}')
print(f'Class Distribution: \n {y_train_smote.value_counts()}')

In [None]:
baseline = LogisticRegression(max_iter=1000, random_state=42)
baseline.fit(X_train_smote, y_train_smote)
y_pred_baseline = baseline.predict(X_test)
y_proba_baseline = baseline.predict_proba(X_test)[:, 1]

print(f"ROC-AUC: {roc_auc_score(y_test, y_proba_baseline):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_baseline))

In [None]:
improved = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.05,
    random_state=42,
    eval_metric='auc'
)
improved.fit(X_train_smote, y_train_smote)
y_pred_improved = improved.predict(X_test)
y_proba_improved = improved.predict_proba(X_test)[:, 1]

print(f"ROC-AUC: {roc_auc_score(y_test, y_proba_improved):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_improved))