In [None]:
import pandas as pd
import numpy as np

file_path = "../../data/processed/medical_equipment_utilization_synthetic_cleaned(in).csv"

raw_df = pd.read_csv(file_path)
raw_df.columns = [c.strip().replace(" ", "_") for c in raw_df.columns]

df = raw_df.dropna(axis=1, how="all").copy()

currency_cols = [
    "Cost_per_procedure",
    "Daily_Operating_Cost",
    "Procedure_Revenue",
    "Net-Profit_(daily)",
]
for col in currency_cols:
    if col in df.columns:
        df[col] = (
            df[col]
            .astype(str)
            .str.replace("KES", "", case=False, regex=False)
            .str.replace(",", "", regex=False)
            .str.replace(" ", "", regex=False)
        )
        df[col] = pd.to_numeric(df[col], errors="coerce")

if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

df.head()


In [None]:
# --- ROI-style metrics by equipment type ---

by_type = (
    df.groupby("Equipment_Type")
    .agg(
        n_days=("Equipment_ID", "count"),
        total_procedures=("Number_of_Procedures", "sum"),
        total_revenue=("Procedure_Revenue", "sum"),
        total_operating_cost=("Daily_Operating_Cost", "sum"),
        total_net_profit=("Net-Profit_(daily)", "sum"),
    )
)

by_type["roi_%"] = 100 * by_type["total_net_profit"] / by_type["total_operating_cost"]
by_type["profit_margin_%"] = 100 * by_type["total_net_profit"] / by_type["total_revenue"]
by_type["avg_profit_per_day"] = by_type["total_net_profit"] / by_type["n_days"]
by_type["avg_profit_per_procedure"] = by_type["total_net_profit"] / by_type["total_procedures"]

by_type.round(2)


In [None]:
# --- ROI-style metrics by department ---

by_dept = (
    df.groupby("Department")
    .agg(
        n_days=("Equipment_ID", "count"),
        total_procedures=("Number_of_Procedures", "sum"),
        total_revenue=("Procedure_Revenue", "sum"),
        total_operating_cost=("Daily_Operating_Cost", "sum"),
        total_net_profit=("Net-Profit_(daily)", "sum"),
    )
)

by_dept["roi_%"] = 100 * by_dept["total_net_profit"] / by_dept["total_operating_cost"]
by_dept["profit_margin_%"] = 100 * by_dept["total_net_profit"] / by_dept["total_revenue"]
by_dept["avg_profit_per_day"] = by_dept["total_net_profit"] / by_dept["n_days"]

by_dept.round(2)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

# --- Visualize ROI and profit metrics by equipment type ---

by_type_plot = by_type.reset_index().copy()

plt.figure(figsize=(8, 4))
sns.barplot(data=by_type_plot, x="Equipment_Type", y="roi_%")
plt.title("ROI (%) by Equipment Type")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4))
sns.barplot(data=by_type_plot, x="Equipment_Type", y="avg_profit_per_day")
plt.title("Average Profit per Day by Equipment Type (KES)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()


In [None]:
# --- Visualize ROI and profit metrics by department ---

by_dept_plot = by_dept.reset_index().copy()

plt.figure(figsize=(6, 4))
sns.barplot(data=by_dept_plot, x="Department", y="roi_%")
plt.title("ROI (%) by Department")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

plt.figure(figsize=(6, 4))
sns.barplot(data=by_dept_plot, x="Department", y="avg_profit_per_day")
plt.title("Average Profit per Day by Department (KES)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()
