# 📘 Project: Predicting Hospital Readmission Risk

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
df = pd.read_csv('diabetic_data.csv')
df.head()

In [None]:
df = df.drop(columns=['encounter_id', 'patient_nbr'], errors='ignore')
df['readmitted'] = df['readmitted'].apply(lambda x: 1 if x == '<30' else 0)
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].astype('category').cat.codes

In [None]:
X = df.drop('readmitted', axis=1)
y = df['readmitted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
importances = model.feature_importances_
features = X.columns
importance_df = pd.DataFrame({'Feature': features, 'Importance': importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(data=importance_df.head(10), x='Importance', y='Feature')
plt.title('Top 10 Feature Importances')
plt.show()

## ✅ Final Summary
Random Forest model trained to predict readmission using EHR data. Explored top predictive features and evaluation metrics.