#  02_modeling.ipynb

This notebook builds a predictive model for sustained participation in community health programs based on CHIS 2023 data.

In [None]:
#  Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


In [None]:
#  Load preprocessed data
df = pd.read_csv('./data/chis_cleaned.csv')  # Make sure this file is cleaned and ready


In [None]:
#  Define target and features
# Target: participation continuity (1 = ≥2 times, 0 = once or none)
y = df['participation_binary']
X = df[['access_score', 'trust_score', 'social_support', 'age', 'gender', 'income_level']]


In [None]:
# ️ Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
#  Train logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
#  Evaluate model performance
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))


>  Modeling complete. Proceed to `03_shap_analysis.ipynb` for interpretability using SHAP.