# EDA and Modeling for Chronic Disease Prediction

This notebook demonstrates:
- Exploratory data analysis (EDA)
- Training a Random Forest Classifier
- Saving the model using `joblib`
- Explaining the model using SHAP

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('../data/diabetes_sample_data.csv')
df.head()

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model
joblib.dump(model, '../models/chronic_model.pkl')

In [None]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
import shap
import matplotlib.pyplot as plt

# Explain model predictions
explainer = shap.Explainer(model, X)
shap_values = explainer(X)

# Visualize SHAP values
shap.summary_plot(shap_values, X)