# Libraries

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Load Data

In [None]:
df = pd.read_csv("Telco-Customer-Churn.csv")
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes




# Convert Target (Churn → Numeric)

In [None]:
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Corelation with churn

In [None]:
correlation = df.corr(numeric_only=True)['Churn'].sort_values(ascending=False)
correlation

Unnamed: 0,Churn
Churn,1.0
MonthlyCharges,0.193356
SeniorCitizen,0.150889
tenure,-0.352229


# Drop Low-Correlation Features (< 0.20)

In [None]:
low_corr_cols = correlation[abs(correlation) < 0.20].index.tolist()
low_corr_cols

['MonthlyCharges', 'SeniorCitizen']

In [None]:
df_filtered = df.drop(columns=low_corr_cols)

# Separate feature and Target

In [None]:
X = df_filtered.drop('Churn', axis=1)
y = df_filtered['Churn']

# Encode catagorical Column

In [None]:
X_encoded = pd.get_dummies(X, drop_first=True)

# Test - Train Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)

# Baseline Random Forest Model

In [None]:
rf_baseline = RandomForestClassifier(
    random_state=42,
    class_weight='balanced'
)

rf_baseline.fit(X_train, y_train)

y_pred_base = rf_baseline.predict(X_test)

baseline_accuracy = accuracy_score(y_test, y_pred_base)
baseline_f1 = f1_score(y_test, y_pred_base)

print(f"Baseline Accuracy: {baseline_accuracy*100:.2f}%")
print(f"Baseline F1-Score: {baseline_f1:.4f}")


Baseline Accuracy: 79.28%
Baseline F1-Score: 0.5731


# Hyperparameter Tuning (RandomizedSearchCV)

In [None]:
param_dist = {
    'n_estimators': [200, 300, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}


rf = RandomForestClassifier(random_state=42)

random_search = RandomizedSearchCV(
    rf,
    param_distributions=param_dist,
    n_iter=10,
    scoring='f1',
    cv=3,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)

# Evaluate Model

In [None]:
best_rf = random_search.best_estimator_

y_pred_tuned = best_rf.predict(X_test)

tuned_accuracy = accuracy_score(y_test, y_pred_tuned)
tuned_f1 = f1_score(y_test, y_pred_tuned)

print("Best Parameters:", random_search.best_params_)
print(f"Tuned Accuracy: {tuned_accuracy*100:.2f}%")
print(f"Tuned F1-Score: {tuned_f1:.4f}")


Best Parameters: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 30}
Tuned Accuracy: 73.46%
Tuned F1-Score: 0.0000


# Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred_tuned)

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.show()

# Final Comparrison

In [None]:
print("===== FINAL COMPARISON =====")
print(f"Baseline Accuracy: {baseline_accuracy*100:.2f}%")
print(f"Tuned Accuracy: {tuned_accuracy*100:.2f}%")
print(f"Baseline F1-Score: {baseline_f1:.4f}")
print(f"Tuned F1-Score: {tuned_f1:.4f}")

===== FINAL COMPARISON =====
Baseline Accuracy: 79.42%
Tuned Accuracy: 80.13%
Baseline F1-Score: 0.5246
Tuned F1-Score: 0.5425
