In [1]:
# 📌 Task 4: Churn Prediction Model

# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Step 2: Create a dummy churn dataset
data = {
    "CustomerID": range(1, 21),
    "Age": [25,45,52,36,23,40,31,60,50,33,29,47,38,41,27,59,48,34,28,39],
    "Tenure": [1,12,24,5,2,20,10,30,25,8,3,15,18,22,6,28,16,7,4,21],
    "MonthlyCharges": [70,90,65,100,80,60,85,95,55,75,68,88,92,58,77,99,73,83,79,67],
    "Churn": [1,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,1,0]  # 1 = Churn, 0 = No Churn
}

df = pd.DataFrame(data)

# Step 3: Define features (X) and target (y)
X = df[["Age", "Tenure", "MonthlyCharges"]]
y = df["Churn"]

# Step 4: Split into train & test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Train Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)

# Step 7: Train Decision Tree
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

# Step 8: Evaluation function
def evaluate_model(name, y_test, y_pred):
    print(f"📊 {name} Performance")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1-Score:", f1_score(y_test, y_pred))
    print("-"*40)

# Step 9: Evaluate both models
evaluate_model("Logistic Regression", y_test, y_pred_log)
evaluate_model("Decision Tree", y_test, y_pred_tree)

# Step 10: Hyperparameter tuning (Decision Tree example)
param_grid = {
    "max_depth": [2,3,4,5],
    "min_samples_split": [2,3,4]
}
grid = GridSearchCV(tree_model, param_grid, cv=3, scoring="f1")
grid.fit(X_train, y_train)

print("✅ Best Parameters:", grid.best_params_)
best_tree = grid.best_estimator_
y_pred_best = best_tree.predict(X_test)

evaluate_model("Tuned Decision Tree", y_test, y_pred_best)

📊 Logistic Regression Performance
Accuracy: 0.8333333333333334
Precision: 1.0
Recall: 0.6666666666666666
F1-Score: 0.8
----------------------------------------
📊 Decision Tree Performance
Accuracy: 0.6666666666666666
Precision: 0.6666666666666666
Recall: 0.6666666666666666
F1-Score: 0.6666666666666666
----------------------------------------
✅ Best Parameters: {'max_depth': 2, 'min_samples_split': 2}
📊 Tuned Decision Tree Performance
Accuracy: 0.6666666666666666
Precision: 0.6666666666666666
Recall: 0.6666666666666666
F1-Score: 0.6666666666666666
----------------------------------------
