<a href="https://colab.research.google.com/github/murphycollins/murphycollins.github.io/blob/main/churn-analysis/notebooks/churn_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install pandas scikit-learn matplotlib seaborn shap

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# Load dataset (you must create churn_sample.csv in your GitHub repo under data/)
url = "https://raw.githubusercontent.com/murphycollins/churn-analysis/main/data/churn_sample.csv"
df = pd.read_csv(url)

df.head()

In [None]:

X = df[['tenure','monthly_charges']]
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

preds = model.predict(X_test)
probs = model.predict_proba(X_test)[:,1]

print(classification_report(y_test, preds))
print("AUC:", roc_auc_score(y_test, probs))

In [None]:

plt.figure(figsize=(6,4))
plt.hist(probs, bins=10, color="skyblue", edgecolor="black")
plt.title("Predicted Churn Probability (Test Set)")
plt.xlabel("Probability")
plt.ylabel("Count")
plt.show()

In [None]:

import shap

explainer = shap.LinearExplainer(model, X_train)
shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values, X_test)