In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

file_path = "../../data/processed/medical_equipment_utilization_synthetic_cleaned(in).csv"

raw_df = pd.read_csv(file_path)
raw_df.columns = [c.strip().replace(" ", "_") for c in raw_df.columns]

df = raw_df.dropna(axis=1, how="all").copy()

currency_cols = [
    "Cost_per_procedure",
    "Daily_Operating_Cost",
    "Procedure_Revenue",
    "Net-Profit_(daily)",
]
for col in currency_cols:
    if col in df.columns:
        df[col] = (
            df[col]
            .astype(str)
            .str.replace("KES", "", case=False, regex=False)
            .str.replace(",", "", regex=False)
            .str.replace(" ", "", regex=False)
        )
        df[col] = pd.to_numeric(df[col], errors="coerce")

numeric_cols = [
    "Available_Hours",
    "Actual_Operating_Hours",
    "Utilization_Rate",
    "Number_of_Procedures",
]
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

print(f"Loaded {len(df)} rows")
df.head()


In [None]:
# --- EDA dashboard ---

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

sns.histplot(df["Utilization_Rate"], kde=True, ax=axes[0, 0])
axes[0, 0].set_title("Utilization Rate Distribution")

sns.scatterplot(
    data=df,
    x="Available_Hours",
    y="Actual_Operating_Hours",
    hue="Equipment_Type",
    ax=axes[0, 1],
    legend=False,
)
axes[0, 1].set_title("Available vs Actual Operating Hours")

by_eq = df["Equipment_Type"].value_counts().reset_index()
by_eq.columns = ["Equipment_Type", "Count"]
sns.barplot(data=by_eq, x="Equipment_Type", y="Count", ax=axes[1, 0])
axes[1, 0].set_title("Record Count by Equipment Type")
axes[1, 0].tick_params(axis="x", rotation=45)

if "Department" in df.columns:
    by_dept = df["Department"].value_counts().reset_index()
    by_dept.columns = ["Department", "Count"]
    sns.barplot(data=by_dept, x="Department", y="Count", ax=axes[1, 1])
    axes[1, 1].set_title("Record Count by Department")
    axes[1, 1].tick_params(axis="x", rotation=45)
else:
    axes[1, 1].axis("off")

plt.suptitle("Exploratory Dashboard", fontsize=16)
plt.tight_layout()
plt.show()
