# Customer Churn Analysis — Logistic Regression

This notebook demonstrates data cleaning, feature engineering, model training (logistic regression), evaluation, and interpretation.

In [ ]:
# Install packages if needed
# !pip install pandas scikit-learn matplotlib seaborn shap

In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

# Load data
df = pd.read_csv('data/churn_sample.csv')
df['tenure'] = df['tenure'].astype(float)
df['monthly_charges'] = df['monthly_charges'].astype(float)

# Feature engineering
X = df[['tenure','monthly_charges']]
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

probs = model.predict_proba(X_test)[:,1]
preds = model.predict(X_test)

print(classification_report(y_test, preds))
print('AUC:', roc_auc_score(y_test, probs))

# Plot probability distribution
plt.figure(figsize=(6,4))
plt.hist(probs, bins=10)
plt.title('Predicted Churn Probability (test set)')
plt.xlabel('Probability')
plt.ylabel('Count')
plt.show()

## Model interpretation
If you want model-level explanation, install `shap` and run SHAP summary plots. Example:

```python
import shap
explainer = shap.LinearExplainer(model, X_train)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
```