#  03_shap_analysis.ipynb

This notebook interprets the logistic regression model using SHAP values to explain key drivers of participation continuity.

In [None]:
#  Import necessary libraries
import pandas as pd
import shap
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


In [None]:
#  Load data
df = pd.read_csv('./data/chis_cleaned.csv')
X = df[['access_score', 'trust_score', 'social_support', 'age', 'gender', 'income_level']]
y = df['participation_binary']


In [None]:
#  Fit logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X, y)


In [None]:
#  Run SHAP analysis
explainer = shap.Explainer(model, X)
shap_values = explainer(X)


In [None]:
#  SHAP summary plot
shap.summary_plot(shap_values, X, show=True)
plt.show()


>  This SHAP analysis highlights which variables had the strongest influence on predicted continuity. Results will be used in poster interpretation and policy discussion.