In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load dataset
file_path = r"/home/tanmay08/Desktop/PC/vsmhackathon/mlbackend/data.csv"
df = pd.read_csv(file_path)

df['D.O.A'] = pd.to_datetime(df['D.O.A'], errors='coerce')

# Filter relevant columns
daily_counts = df.groupby(['D.O.A', 'TYPE OF ADMISSION-EMERGENCY/OPD']).size().unstack(fill_value=0)
daily_counts.columns = ['Emergency_Patients', 'OPD_Patients']
daily_counts = daily_counts.reset_index()

# Feature Engineering
daily_counts['Day'] = daily_counts['D.O.A'].dt.day
daily_counts['Month'] = daily_counts['D.O.A'].dt.month
daily_counts['Weekday'] = daily_counts['D.O.A'].dt.weekday
daily_counts['Prev_Emergency'] = daily_counts['Emergency_Patients'].shift(1).fillna(0)
daily_counts['Prev_OPD'] = daily_counts['OPD_Patients'].shift(1).fillna(0)

# Define features and target variables
X = daily_counts[['Day', 'Month', 'Weekday', 'Prev_Emergency', 'Prev_OPD']]
y_emergency = daily_counts['Emergency_Patients']
y_opd = daily_counts['OPD_Patients']

# Train-test split
X_train, X_test, y_train_emergency, y_test_emergency = train_test_split(X, y_emergency, test_size=0.2, random_state=42)
X_train, X_test, y_train_opd, y_test_opd = train_test_split(X, y_opd, test_size=0.2, random_state=42)

# Train models
rf_emergency = RandomForestRegressor(n_estimators=100, random_state=42)
rf_opd = RandomForestRegressor(n_estimators=100, random_state=42)
rf_emergency.fit(X_train, y_train_emergency)
rf_opd.fit(X_train, y_train_opd)

# Make predictions
preds_emergency = rf_emergency.predict(X_test)
preds_opd = rf_opd.predict(X_test)

# Evaluate model
mae_emergency = mean_absolute_error(y_test_emergency, preds_emergency)
mae_opd = mean_absolute_error(y_test_opd, preds_opd)
print(f"MAE for Emergency: {mae_emergency}, MAE for OPD: {mae_opd}")

# Function to predict future patient counts
def predict_patients(day, month, weekday, prev_emergency, prev_opd):
    input_data = pd.DataFrame([[day, month, weekday, prev_emergency, prev_opd]], 
                              columns=['Day', 'Month', 'Weekday', 'Prev_Emergency', 'Prev_OPD'])
    pred_emergency = rf_emergency.predict(input_data)[0]
    pred_opd = rf_opd.predict(input_data)[0]
    return round(pred_emergency), round(pred_opd)

# Example prediction
future_day, future_month, future_weekday = 15, 2, 3  # Example: 15th Feb, Wednesday
pred_emergency, pred_opd = predict_patients(future_day, future_month, future_weekday, 10, 15)
print(f"Predicted Emergency Patients: {pred_emergency}, Predicted OPD Patients: {pred_opd}")


MAE for Emergency: 3.2562328767123287, MAE for OPD: 2.6880136986301375
Predicted Emergency Patients: 12, Predicted OPD Patients: 6


In [3]:
import joblib

# Save the trained models
joblib.dump(rf_emergency, "rf_emergency_model.pkl")
joblib.dump(rf_opd, "rf_opd_model.pkl")

print("Models saved successfully!")


Models saved successfully!


In [4]:
import joblib

# Combine both models into a dictionary
models = {
    "rf_emergency": rf_emergency,
    "rf_opd": rf_opd
}

# Save the combined model
joblib.dump(models, "patient_prediction_model.pkl")

print("Combined model saved successfully!")


Combined model saved successfully!


In [5]:
import joblib

# Combine both models into a dictionary
models = {
    "rf_emergency": rf_emergency,
    "rf_opd": rf_opd
}

# Save the model in binary mode
with open("patient_prediction_model.pkl", "wb") as f:
    joblib.dump(models, f)

print("Combined model saved successfully!")


Combined model saved successfully!
