In [None]:
import pandas as pd
import numpy as np

file_path = "../../data/processed/medical_equipment_utilization_synthetic_cleaned(in).csv"

raw_df = pd.read_csv(file_path)
raw_df.columns = [c.strip().replace(" ", "_") for c in raw_df.columns]

df = raw_df.dropna(axis=1, how="all").copy()

numeric_cols = [
    "Available_Hours",
    "Actual_Operating_Hours",
    "Utilization_Rate",
    "Number_of_Procedures",
]
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

df.head()


In [None]:
# --- Utilization KPIs by equipment type ---

by_type = (
    df.groupby("Equipment_Type")
    .agg(
        n_days=("Equipment_ID", "count"),
        avg_available_hours=("Available_Hours", "mean"),
        avg_operating_hours=("Actual_Operating_Hours", "mean"),
        avg_utilization_rate=("Utilization_Rate", "mean"),
        avg_procedures_per_day=("Number_of_Procedures", "mean"),
        total_procedures=("Number_of_Procedures", "sum"),
    )
    .sort_values("avg_utilization_rate", ascending=False)
)

by_type.round(2)


In [None]:
# --- Utilization KPIs by department and over time ---

by_dept = (
    df.groupby("Department")
    .agg(
        n_days=("Equipment_ID", "count"),
        avg_utilization_rate=("Utilization_Rate", "mean"),
        total_procedures=("Number_of_Procedures", "sum"),
    )
    .sort_values("avg_utilization_rate", ascending=False)
)

print("=== Utilization by department ===")
print(by_dept.round(2))

if "Month" in df.columns:
    by_month = (
        df.groupby("Month")
        .agg(
            avg_utilization_rate=("Utilization_Rate", "mean"),
            total_procedures=("Number_of_Procedures", "sum"),
        )
        .sort_values("avg_utilization_rate", ascending=False)
    )

    print("\n=== Utilization by month ===")
    print(by_month.round(2))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

# --- Utilization by equipment type ---

by_type_plot = by_type.reset_index().copy()

plt.figure(figsize=(8, 4))
sns.barplot(data=by_type_plot, x="Equipment_Type", y="avg_utilization_rate")
plt.title("Average Utilization Rate by Equipment Type")
plt.ylabel("Utilization Rate")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4))
sns.barplot(data=by_type_plot, x="Equipment_Type", y="avg_procedures_per_day")
plt.title("Average Procedures per Day by Equipment Type")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()


In [None]:
# --- Utilization by department and by month ---

by_dept_plot = by_dept.reset_index().copy()

plt.figure(figsize=(6, 4))
sns.barplot(data=by_dept_plot, x="Department", y="avg_utilization_rate")
plt.title("Average Utilization Rate by Department")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

if "Month" in df.columns:
    by_month = (
        df.groupby("Month")
        .agg(
            avg_utilization_rate=("Utilization_Rate", "mean"),
            total_procedures=("Number_of_Procedures", "sum"),
        )
        .sort_values("avg_utilization_rate", ascending=False)
    )

    by_month_plot = by_month.reset_index().copy()

    plt.figure(figsize=(8, 4))
    sns.barplot(data=by_month_plot, x="Month", y="avg_utilization_rate")
    plt.title("Average Utilization Rate by Month")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()
