In [1]:
import pandas as pd
import numpy as np
import os

# --- 1. Initial Setup ---
# For reproducibility
np.random.seed(42)

# Define hospital profiles and drugs
hospitals = ['Hosp_A_General', 'Hosp_B_Oncology', 'Hosp_C_Community']
dates = pd.date_range(start='2024-01-01', periods=365)  # Data for a full year

# List of drugs, categorized
general_drugs = ['Paracetamol', 'Ibuprofen', 'Amoxicillin']
specialty_drugs = ['Metformin', 'Atorvastatin', 'Letrozole', 'Trastuzumab']
seasonal_drugs = ['Oseltamivir', 'Cough Syrup']
all_drugs = general_drugs + specialty_drugs + seasonal_drugs

data = []

# --- 2. Simulate Data Based on Each Hospital's Profile ---
print("Generating heterogeneous data for each hospital...")

for date in dates:
    for hospital in hospitals:
        for drug in all_drugs:
            inventory = np.random.randint(50, 300) # inventory level
            demand = 0 # default demand

            # Profile for Hospital A (General Hospital)
            if hospital == 'Hosp_A_General':
                if drug in general_drugs:
                    demand = np.random.randint(80, 150) # High demand for general drugs
                elif drug in specialty_drugs:
                    demand = np.random.randint(20, 50) # Medium demand for specialty drugs
                else:
                    demand = np.random.randint(5, 20) # Low demand for seasonal drugs

            # Profile for Hospital B (Oncology Center)
            elif hospital == 'Hosp_B_Oncology':
                if drug in specialty_drugs:
                    demand = np.random.randint(100, 200) # Very high demand for specialty drugs
                else:
                    demand = np.random.randint(0, 10) # Negligible demand for other drugs

            # Profile for Hospital C (Community Clinic)
            elif hospital == 'Hosp_C_Community':
                # Check for winter season (Dec, Jan, Feb)
                is_winter = date.month in [12, 1, 2]
                if drug in seasonal_drugs and is_winter:
                    demand = np.random.randint(40, 80) # High demand for seasonal drugs in winter
                elif drug in general_drugs:
                    demand = np.random.randint(10, 30) # Low demand for general drugs
                else:
                    demand = np.random.randint(0, 5) # Negligible demand for specialty drugs
            
            data.append([date, hospital, drug, inventory, demand])

# Create the final DataFrame
df = pd.DataFrame(data, columns=['date', 'hospital_id', 'drug_name', 'inventory', 'demand'])

print("Data generation complete.")

# --- 3. Save the CSV Files ---
output_dir = "data"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for hospital in hospitals:
    df_hosp = df[df['hospital_id'] == hospital]
    file_path = os.path.join(output_dir, f'inventory_{hospital}.csv')
    df_hosp.to_csv(file_path, index=False)
    print(f"Saved dataset for {hospital} at: {file_path}")

print("\nAll heterogeneous datasets have been created successfully.")
df.head()

Generating heterogeneous data for each hospital...
Data generation complete.
Saved dataset for Hosp_A_General at: data/inventory_Hosp_A_General.csv
Saved dataset for Hosp_B_Oncology at: data/inventory_Hosp_B_Oncology.csv
Saved dataset for Hosp_C_Community at: data/inventory_Hosp_C_Community.csv

All heterogeneous datasets have been created successfully.


Unnamed: 0,date,hospital_id,drug_name,inventory,demand
0,2024-01-01,Hosp_A_General,Paracetamol,152,131
1,2024-01-01,Hosp_A_General,Ibuprofen,142,94
2,2024-01-01,Hosp_A_General,Amoxicillin,156,140
3,2024-01-01,Hosp_A_General,Metformin,70,26
4,2024-01-01,Hosp_A_General,Atorvastatin,171,38
