<a href="https://colab.research.google.com/github/roywang01-boop/AAI2026/blob/main/Part2_Customer_Churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("churn.csv")

# Features and target
X = df[["age","monthly_usage_hours","purchase_amount","customer_service_calls","region"]]
y = df["churn"]

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), ["age","monthly_usage_hours","purchase_amount","customer_service_calls"]),
        ("cat", OneHotEncoder(handle_unknown="ignore"), ["region"])
    ]
)

# Logistic Regression model
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=2000, random_state=42))
])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model.fit(X_train, y_train)

# Accuracy
pred_test = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, pred_test))

# Predict churn probability for new customer
new_customer = pd.DataFrame({
    "age":[35],
    "monthly_usage_hours":[20],
    "purchase_amount":[150],
    "customer_service_calls":[5],
    "region":["West"]
})

prob = model.predict_proba(new_customer)[0][1]
threshold = 0.5
prediction = 1 if prob >= threshold else 0

print("\nChurn Probability:", round(prob,2))
print("Churn Prediction (1=churn, 0=no churn):", prediction)

# Explanation
print("\nExplanation:")
print("- The churn probability represents the likelihood the customer will leave.")
print("- If probability >= 0.5, we classify the customer as at risk of churning.")
print("- Businesses can use this information to offer discounts, improve service,")
print("  or provide loyalty rewards to reduce customer loss.")

Model Accuracy: 0.64

Churn Probability: 0.6
Churn Prediction (1=churn, 0=no churn): 1

Explanation:
- The churn probability represents the likelihood the customer will leave.
- If probability >= 0.5, we classify the customer as at risk of churning.
- Businesses can use this information to offer discounts, improve service,
  or provide loyalty rewards to reduce customer loss.
