# 04 - Overbooking Simulation
This notebook simulates an overbooking strategy for high no-show risk patients to minimize idle appointment slots.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load data
df = pd.read_csv('../data/healthcare_noshows_appt.csv')

# Rename and convert
df.rename(columns={
    'Hipertension': 'Hypertension',
    'Handcap': 'Handicap',
    'Showed_up': 'ShowedUp',
    'Date.diff': 'LeadTimeDays'
}, inplace=True)
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['AppointmentWeekday'] = df['AppointmentDay'].dt.dayofweek
df['LeadTime'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['HourScheduled'] = df['ScheduledDay'].dt.hour
df['Gender'] = df['Gender'].map({'F': 0, 'M': 1})
df['ShowedUp'] = df['ShowedUp'].astype(int)
df.drop(['PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay', 'Neighbourhood'], axis=1, inplace=True, errors='ignore')

In [None]:
# Features and target
X = df.drop('ShowedUp', axis=1)
y = df['ShowedUp']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict probabilities
y_probs = model.predict_proba(X_test)[:, 1]  # Probability of showing up

## Simulate Overbooking Strategy

In [None]:
# Define overbooking rule
threshold = 0.5  # patients with < 0.5 prob are high risk for no-show

# Simulate overbooking: allow 2 patients in a slot if both < threshold
high_risk = y_probs < threshold
overbooked_slots = np.sum(high_risk) // 2

print(f"Total high-risk patients: {np.sum(high_risk)}")
print(f"Suggested number of overbooked slots: {overbooked_slots}")

## Evaluate Impact

In [None]:
import matplotlib.pyplot as plt

# Histogram of show-up probabilities
plt.hist(y_probs, bins=30, edgecolor='k')
plt.axvline(threshold, color='red', linestyle='--')
plt.title('Distribution of Show-Up Probabilities')
plt.xlabel('Predicted Show-Up Probability')
plt.ylabel('Number of Patients')
plt.show()