In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

file_path = "../../data/processed/medical_equipment_utilization_synthetic_cleaned(in).csv"

raw_df = pd.read_csv(file_path)
raw_df.columns = [c.strip().replace(" ", "_") for c in raw_df.columns]

df = raw_df.dropna(axis=1, how="all").copy()

currency_cols = [
    "Cost_per_procedure",
    "Daily_Operating_Cost",
    "Procedure_Revenue",
    "Net-Profit_(daily)",
]
for col in currency_cols:
    if col in df.columns:
        df[col] = (
            df[col]
            .astype(str)
            .str.replace("KES", "", case=False, regex=False)
            .str.replace(",", "", regex=False)
            .str.replace(" ", "", regex=False)
        )
        df[col] = pd.to_numeric(df[col], errors="coerce")

if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

by_type = (
    df.groupby("Equipment_Type")
    .agg(
        total_revenue=("Procedure_Revenue", "sum"),
        total_operating_cost=("Daily_Operating_Cost", "sum"),
        total_net_profit=("Net-Profit_(daily)", "sum"),
        n_days=("Equipment_ID", "count"),
    )
)
by_type["roi_%"] = 100 * by_type["total_net_profit"] / by_type["total_operating_cost"]
by_type["profit_margin_%"] = 100 * by_type["total_net_profit"] / by_type["total_revenue"]
by_type["avg_profit_per_day"] = by_type["total_net_profit"] / by_type["n_days"]

by_dept = (
    df.groupby("Department")
    .agg(
        total_revenue=("Procedure_Revenue", "sum"),
        total_operating_cost=("Daily_Operating_Cost", "sum"),
        total_net_profit=("Net-Profit_(daily)", "sum"),
        n_days=("Equipment_ID", "count"),
    )
)
by_dept["roi_%"] = 100 * by_dept["total_net_profit"] / by_dept["total_operating_cost"]
by_dept["profit_margin_%"] = 100 * by_dept["total_net_profit"] / by_dept["total_revenue"]
by_dept["avg_profit_per_day"] = by_dept["total_net_profit"] / by_dept["n_days"]

by_type.round(2), by_dept.round(2)


In [None]:
# --- ROI dashboard ---

by_type_plot = by_type.reset_index().copy()
by_dept_plot = by_dept.reset_index().copy()

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

sns.barplot(data=by_type_plot, x="Equipment_Type", y="roi_%", ax=axes[0, 0])
axes[0, 0].set_title("ROI (%) by Equipment Type")
axes[0, 0].tick_params(axis="x", rotation=45)

sns.barplot(data=by_type_plot, x="Equipment_Type", y="avg_profit_per_day", ax=axes[0, 1])
axes[0, 1].set_title("Avg Profit per Day (KES)")
axes[0, 1].tick_params(axis="x", rotation=45)

sns.barplot(data=by_dept_plot, x="Department", y="roi_%", ax=axes[1, 0])
axes[1, 0].set_title("ROI (%) by Department")
axes[1, 0].tick_params(axis="x", rotation=45)

sns.barplot(data=by_dept_plot, x="Department", y="avg_profit_per_day", ax=axes[1, 1])
axes[1, 1].set_title("Avg Profit per Day by Department (KES)")
axes[1, 1].tick_params(axis="x", rotation=45)

plt.suptitle("ROI Dashboard", fontsize=16)
plt.tight_layout()
plt.show()
