# Churn Prediction & Explainability

## 1. Business Problem
Predict customer churn using behavioral and temporal usage patterns.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:

df = pd.read_csv('../data/subscription_usage_data.csv')
df.head()


## 2. Exploratory Data Analysis

In [None]:

plt.figure()
df['churn'].value_counts().plot(kind='bar')
plt.title('Churn Distribution')
plt.show()


## 3. Feature Engineering

In [None]:

df['revenue_lifetime'] = df['tenure_months'] * df['monthly_charges']
df['engagement_score'] = (
    df['usage_minutes_last_30d'] * 0.6 +
    df['avg_sessions_per_week'] * 0.4
)
df.head()


## 4. Model Training

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier

features = [
    "tenure_months","monthly_charges","usage_minutes_last_30d",
    "avg_sessions_per_week","support_tickets_last_90d",
    "usage_trend_ratio","revenue_lifetime","engagement_score"
]

X = df[features]
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_auc = roc_auc_score(y_test, lr.predict_proba(X_test)[:,1])

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
xgb_auc = roc_auc_score(y_test, xgb.predict_proba(X_test)[:,1])

lr_auc, xgb_auc


## 5. SHAP Explainability

In [None]:

import shap
explainer = shap.Explainer(xgb)
shap_values = explainer(X_test)

plt.figure()
shap.plots.beeswarm(shap_values, show=False)
plt.show()


## 6. Export for Power BI

In [None]:

predictions = X_test.copy()
predictions['actual_churn'] = y_test.values
predictions['churn_probability'] = xgb.predict_proba(X_test)[:,1]

predictions.to_csv('../powerbi/churn_predictions_for_powerbi.csv', index=False)
predictions.head()
