# Surgical Site Infection (SSI) Prediction

This is a starter notebook for building and testing baseline models to predict surgical site infection (SSI).

Use this as your foundation for exploratory data analysis, model training, and interpretability.

---

In [ ]:
# Install dependencies (if needed)
# !pip install pandas numpy scikit-learn matplotlib xgboost shap

In [ ]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Synthetic demo dataset
np.random.seed(42)
n = 300
df = pd.DataFrame({
    'age': np.random.randint(20, 80, n),
    'bmi': np.random.normal(27, 5, n).round(1),
    'op_time': np.random.normal(120, 40, n).clip(20, 300),
    'diabetes': np.random.choice([0,1], size=n, p=[0.75, 0.25])
})
df['ssi'] = (0.02*df['age'] + 0.1*(df['bmi']>30) + 0.05*df['diabetes'] + np.random.normal(0, 0.05, n) > 0.15).astype(int)
df.head()

In [ ]:
# Train/test split and simple logistic model
X = df[['age', 'bmi', 'op_time', 'diabetes']]
y = df['ssi']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=300)
model.fit(X_train, y_train)
print('Training complete. Test accuracy:', model.score(X_test, y_test))