In [6]:
"""
NHS MCDA for patient pathway prioritization

This script implements a small, flexible Multi-Criteria Decision Analysis (MCDA)
for ranking patients within each Speciality (department).

Primary rule from user:
  1) Group (primary key): Speciality
  2) Within each Speciality, rank using criteria in this order (but implemented
     as a weighted MCDA so weights can be tuned):
       - Complexity (numerical) â€” larger => higher priority
       - Acuity (1..5, higher = worse) â€” larger => higher priority
       - VitalsTrend (categorical) â€” priority order: Deteriorating > Stable > Improving

Features:
  - Normalises numeric values (optionally within each Speciality)
  - Maps VitalsTrend to an ordinal score
  - Allows adjustable weights for each criterion
  - Handles missing data sensibly
  - Returns a ranked DataFrame per Speciality and an overall ordering (Speciality groups preserved)

Usage example included at bottom.
"""

from typing import Dict, Any
import pandas as pd
import numpy as np


DEFAULT_VITALS_ORDER = {
    'Deteriorating': 1.0,
    'Stable': 0.5,
    'Improving': 0.0
}


def normalize_series(s: pd.Series) -> pd.Series:
    """Min-max normalize a pandas Series to [0,1]. If constant, returns 0.5 for all.
    NaNs are left as NaN.
    """
    valid = s.dropna()
    if valid.empty:
        return s
    mn = valid.min()
    mx = valid.max()
    if mn == mx:
        # constant series; return 0.5 for known values
        out = s.copy()
        out.loc[s.notna()] = 0.5
        return out
    return (s - mn) / (mx - mn)


def compute_mcda_scores(
    df: pd.DataFrame,
    weights: Dict[str, float] = None,
    vitals_map: Dict[str, float] = None,
    normalize_within_Speciality: bool = True,
    patient_id: str = 'pseudo_patient_id',
    age_col: str = 'age',
    gender_col: str = 'sex',
    complexity_col: str = 'Complexity',
    acuity_col: str = 'Acuity',
    primarydiagnosis_col: str = 'Primary Diagnosis Summary',
    department_col: str = 'Speciality',
    vitals_col: str = 'Vitals Trend',
    waiting_col: str = 'Waiting Time (days)',
    admission_col: str = 'Time since Admission (days)',
    nextAction_col: str = 'nextAction',
    blocker_col: str = 'blocker',
    dischargedependence_col: str = 'Discharge Dependence'
) -> pd.DataFrame:

    df = df.copy()
    required = ['Speciality', complexity_col, acuity_col, waiting_col, admission_col, vitals_col]

    # Find missing columns
    missing = [c for c in required if c not in df.columns]

    # If any missing, create them with default value 0
    for c in missing:
        df[c] = 0

    # Ensure all expected keys are present (missing keys get 0.0)
    for k in ['complexity', 'acuity', 'waiting']:
        weights.setdefault(k, 0.0)

    w_total = sum(weights.values())
    if w_total == 0:
        raise ValueError('Sum of weights must be > 0')
    weights = {k: v / w_total for k, v in weights.items()}

    if vitals_map is None:
        vitals_map = DEFAULT_VITALS_ORDER

    df['_vitals_score_raw'] = df[vitals_col].map(vitals_map)
    if df['_vitals_score_raw'].isna().any():
        known_median = df['_vitals_score_raw'].median(skipna=True)
        df['_vitals_score_raw'].fillna(known_median, inplace=True)

    if normalize_within_Speciality:
        norm_complexity = df.groupby('Speciality')[complexity_col].transform(lambda s: normalize_series(s))
        norm_acuity = df.groupby('Speciality')[acuity_col].transform(lambda s: normalize_series(s))
        norm_wait = df.groupby('Speciality')[waiting_col].transform(lambda s: normalize_series(s))
        norm_admit = df.groupby('Speciality')[admission_col].transform(lambda s: normalize_series(s))
    else:
        norm_complexity = normalize_series(df[complexity_col])
        norm_acuity = normalize_series(df[acuity_col])
        norm_wait = normalize_series(df[waiting_col])
        norm_admit = normalize_series(df[admission_col])

    df['_norm_complexity'] = df.groupby('Speciality')[complexity_col].transform(lambda s: normalize_series(s)).fillna(0.5)
    df['_norm_acuity'] = df.groupby('Speciality')[acuity_col].transform(lambda s: normalize_series(s)).fillna(0.5)
    df['_norm_wait'] = df.groupby('Speciality')[waiting_col].transform(lambda s: normalize_series(s)).fillna(0.5)
    df['_norm_admit'] = df.groupby('Speciality')[admission_col].transform(lambda s: normalize_series(s)).fillna(0.5)

    df['MCDA_score'] = (
        weights['complexity'] * df['_norm_complexity'] +
        weights['acuity'] * df['_norm_acuity'] +
        weights['vitals'] * df['_vitals_score_raw'] +
        weights['waiting'] * df['_norm_wait'] +
        weights['admission'] * df['_norm_admit']
    )

    df['mcda_rank_within_Speciality'] = df.groupby('Speciality')['MCDA_score'].rank(method='first', ascending=False).astype(int)

    df['tie_breaker'] = df[[acuity_col, complexity_col, vitals_col]].apply(
        lambda row: f"Acuity={row[acuity_col]}|Complexity={row[complexity_col]}|Vitals={row[vitals_col]}", axis=1
    )

    df.drop(columns=['_vitals_score_raw', '_norm_complexity', '_norm_acuity', '_norm_wait', '_norm_admit'], inplace=True)

    return df


def rank_within_all_specialties(df_out: pd.DataFrame) -> pd.DataFrame:
    """
    Returns a DataFrame ordered first by Speciality (alphabetical) and then by mcda_rank_within_Speciality.
    If you prefer a different Speciality order, reorder 'Speciality' before calling this function.
    """
    df = df_out.copy()
    df.sort_values(['Speciality', 'mcda_rank_within_Speciality'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df



In [7]:

# -------------------------------
# Example usage (run as script or import functions)
# -------------------------------
if __name__ == '__main__':
    # Load ScenarioA data
    from pathlib import Path
    import pandas as pd

    data_dir = Path('../data/scenarioA/')
    paths = {
        "current": data_dir / "ScenarioA_patients_current.csv",
        "coming": data_dir / "ScenarioA_patients_coming.csv",
        "historic": data_dir / "ScenarioA_patients_historic.csv"
    }

    dfs = {k: pd.read_csv(v) for k, v in paths.items()}
    for name, df in dfs.items():
        df.columns = [c.strip() for c in df.columns]
        dfs[name] = df

    current = dfs["current"].copy()
    coming = dfs["coming"].copy()
    historic = dfs["historic"].copy()

    # Choose which dataset to run: 'current', 'coming', or 'historic'
    which = 'current'
    input_df = dfs[which].copy()

    if which == 'current':
        weights = {
            'complexity': 0.20,
            'acuity': 0.30,
            'vitals': 0.30,
            'waiting': 0.10,
            'admission': 0.10
        }
    elif which == 'coming':
        weights = {
            'complexity': 0.30,
            'acuity': 0.50,
            'waiting': 0.20,
            'vitals': 0,
            'admission':0,
        }
    # elif which == 'historic':
    #     weights = {
    #         'complexity': 0.25,
    #         'acuity': 0.25,
    #         'vitals': 0.25,
    #         'waiting': 0.125,
    #         'admission': 0.125
    #     }

    ranked = compute_mcda_scores(input_df, weights=weights, normalize_within_Speciality=True)
    ordered = rank_within_all_specialties(ranked)

    save_name = f'mcda_ranked_patients_{which}.csv'
    ordered.to_csv(save_name, index=False)
    print(ordered.head())


  pseudo_patient_id  age sex  Complexity  Acuity Primary Diagnosis Summary  \
0            SP0814    4   F        1.43       4                   K50-K52   
1            SP0853   50   F        1.39       4                   K55-K64   
2            SP0884   37   F        1.47       5                   K50-K52   
3            SP0838   31   F        1.49       5                   K00-K14   
4            SP0888   34   F        1.08       5                   K50-K52   

          Speciality   Vitals Trend  Waiting Time (days)  \
0  Gastronenterology  Deteriorating                   33   
1  Gastronenterology  Deteriorating                    7   
2  Gastronenterology         Stable                   95   
3  Gastronenterology         Stable                   63   
4  Gastronenterology         Stable                    3   

   Time since Admission (days)  nextAction             blocker  \
0                            2   Discharge  Staff Availability   
1                            1      Re

In [8]:
from typing import Dict, Any
import pandas as pd
import numpy as np

DEFAULT_VITALS_ORDER = {
    'Deteriorating': 1.0,
    'Stable': 0.5,
    'Improving': 0.0
}

# ---------- Utilities ----------
def _keyize(name: str) -> str:
    """Lowercase, strip, collapse spaces/punct to create a matching key."""
    if name is None:
        return ""
    return "".join(ch for ch in name.strip().lower() if ch.isalnum())

def _apply_column_aliases(df: pd.DataFrame, alias_map: Dict[str, str]) -> pd.DataFrame:
    """
    Map many possible input column spellings to canonical names present in alias_map values.
    If a canonical column already exists, it is left untouched.
    """
    df = df.copy()
    # Build lookup from normalized current col names -> actual col name
    current = {_keyize(c): c for c in df.columns}

    for variants, canonical in alias_map.items():
        # variants can be a list/tuple of possible keys
        if isinstance(variants, (list, tuple, set)):
            found = None
            for v in variants:
                vkey = _keyize(v)
                if vkey in current:
                    found = current[vkey]
                    break
        else:
            vkey = _keyize(variants)
            found = current.get(vkey, None)

        if found is not None and canonical not in df.columns:
            df.rename(columns={found: canonical}, inplace=True)

    return df

def normalize_series(s: pd.Series) -> pd.Series:
    """Min-max normalize a pandas Series to [0,1]. If constant, returns 0.5 for all; NaNs preserved."""
    valid = s.dropna()
    if valid.empty:
        return s
    mn = valid.min()
    mx = valid.max()
    if mn == mx:
        out = s.copy()
        out.loc[s.notna()] = 0.5
        return out
    return (s - mn) / (mx - mn)

# ---------- Core MCDA ----------
def compute_mcda_scores(
    df: pd.DataFrame,
    weights: Dict[str, float] = None,
    vitals_map: Dict[str, float] = None,
    normalize_within_Speciality: bool = True,
    patient_id: str = 'pseudo_patient_id',
    age_col: str = 'age',
    gender_col: str = 'sex',
    complexity_col: str = 'Complexity',
    acuity_col: str = 'Acuity',
    primarydiagnosis_col: str = 'Primary Diagnosis Summary',
    department_col: str = 'Speciality',
    vitals_col: str = 'Vitals Trend',
    waiting_col: str = 'Waiting Time (days)',
    admission_col: str = 'Time since Admission (days)',
    nextAction_col: str = 'nextAction',
    blocker_col: str = 'blocker',
    dischargedependence_col: str = 'Discharge Dependence'
) -> pd.DataFrame:

    # 1) Standardize incoming columns (handles spaces/US-UK spellings/etc.)
    alias_map = {
        # department
        ('speciality', 'specialty', 'department', 'dept'): department_col,
        # vitals trend
        ('vitalstrend', 'vitals trend', 'vitals', 'trend'): vitals_col,
        # waiting days
        ('waitingtime(days)', 'waitingtime', 'waitingdays', 'waitdays'): waiting_col,
        # admission days
        ('timesinceadmission(days)', 'timesinceadmission', 'admissiondays', 'admitdays'): admission_col,
        # complexity / acuity
        ('complexity',): complexity_col,
        ('acuity',): acuity_col,
        # others used elsewhere
        ('primarydiagnosissummary', 'primarydiagnosis', 'diagnosis', 'dx'): primarydiagnosis_col,
        ('dischargedependence', 'discharge dependence'): dischargedependence_col,
        ('nextaction',): nextAction_col,
        ('blocker', 'block'): blocker_col,
        ('sex', 'gender'): gender_col,
        ('age',): age_col,
        ('pseudopatientid', 'patientid', 'id'): patient_id,
    }
    df = _apply_column_aliases(df, alias_map)

    # 2) Create missing required columns with safe defaults
    df = df.copy()
    # Required for scoring:
    required = [department_col, complexity_col, acuity_col, waiting_col, admission_col, vitals_col]

    # Decide defaults by type
    default_values: Dict[str, Any] = {
        department_col: "Unknown",
        complexity_col: 0.0,
        acuity_col: 0.0,
        waiting_col: 0.0,
        admission_col: 0.0,
        vitals_col: "Stable",   # Safe neutral-ish default
    }

    for c in required:
        if c not in df.columns:
            df[c] = default_values[c]

    # 3) Coerce numeric columns to numeric (errors='coerce' -> NaN -> later filled/handled)
    for c in [complexity_col, acuity_col, waiting_col, admission_col]:
        df[c] = pd.to_numeric(df[c], errors='coerce')

    # 4) Weights & vitals map defaults
    if weights is None:
        weights = dict(complexity=0.25, acuity=0.25, vitals=0.25, waiting=0.125, admission=0.125)
    # ensure all keys present
    for k in ['complexity', 'acuity', 'vitals', 'waiting', 'admission']:
        weights.setdefault(k, 0.0)

    w_total = sum(weights.values())
    if w_total == 0:
        # If user gives all zeros, fall back to equal non-zero weights on present criteria
        weights = dict(complexity=0.25, acuity=0.25, vitals=0.25, waiting=0.125, admission=0.125)
        w_total = 1.0
    weights = {k: v / w_total for k, v in weights.items()}

    if vitals_map is None:
        vitals_map = DEFAULT_VITALS_ORDER

    # 5) Vitals mapping -> numeric score, fill unknowns with median of known
    df['_vitals_score_raw'] = df[vitals_col].map(vitals_map)
    if df['_vitals_score_raw'].isna().any():
        known_median = df['_vitals_score_raw'].median(skipna=True)
        # If still NaN (e.g., no known), fall back to 0.5
        if pd.isna(known_median):
            known_median = 0.5
        df['_vitals_score_raw'] = df['_vitals_score_raw'].fillna(known_median)

    # 6) Normalization (within department or global)
    if normalize_within_Speciality:
        group = df.groupby(department_col, dropna=False)
        norm_complexity = group[complexity_col].transform(normalize_series)
        norm_acuity     = group[acuity_col].transform(normalize_series)
        norm_wait       = group[waiting_col].transform(normalize_series)
        norm_admit      = group[admission_col].transform(normalize_series)
    else:
        norm_complexity = normalize_series(df[complexity_col])
        norm_acuity     = normalize_series(df[acuity_col])
        norm_wait       = normalize_series(df[waiting_col])
        norm_admit      = normalize_series(df[admission_col])

    # Fill NaNs from normalization (e.g., all-NaN/constant groups) with neutral 0.5
    df['_norm_complexity'] = norm_complexity.fillna(0.5)
    df['_norm_acuity']     = norm_acuity.fillna(0.5)
    df['_norm_wait']       = norm_wait.fillna(0.5)
    df['_norm_admit']      = norm_admit.fillna(0.5)

    # 7) MCDA score
    df['MCDA_score'] = (
        weights['complexity'] * df['_norm_complexity'] +
        weights['acuity']     * df['_norm_acuity'] +
        weights['vitals']     * df['_vitals_score_raw'] +
        weights['waiting']    * df['_norm_wait'] +
        weights['admission']  * df['_norm_admit']
    )

    # 8) Rank within department
    df['mcda_rank_within_Speciality'] = (
        df.groupby(department_col)['MCDA_score']
          .rank(method='first', ascending=False)
          .astype(int)
    )

    # 9) Tie-breaker label (robust to missing)
    def _fmt_tie(row):
        a = row.get(acuity_col, np.nan)
        c = row.get(complexity_col, np.nan)
        v = row.get(vitals_col, "Unknown")
        return f"Acuity={a}|Complexity={c}|Vitals={v}"

    df['tie_breaker'] = df.apply(_fmt_tie, axis=1)

    # 10) Cleanup helper cols
    df.drop(columns=['_vitals_score_raw', '_norm_complexity', '_norm_acuity', '_norm_wait', '_norm_admit'], inplace=True)

    return df

def rank_within_all_specialties(df_out: pd.DataFrame, department_col: str = 'Speciality') -> pd.DataFrame:
    """
    Returns a DataFrame ordered first by department_col (alphabetical)
    and then by mcda_rank_within_Speciality.
    """
    df = df_out.copy()
    if department_col not in df.columns:
        # Graceful fallback
        department_col = next((c for c in df.columns if _keyize(c) in ('speciality','specialty','department')), 'Speciality')
        if department_col not in df.columns:
            df[department_col] = "Unknown"
    df.sort_values([department_col, 'mcda_rank_within_Speciality'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

# -------------------------------
# Example usage
# -------------------------------
if __name__ == '__main__':
    from pathlib import Path

    data_dir = Path('../data/scenarioA/')
    paths = {
        "current":  data_dir / "ScenarioA_patients_current.csv",
        "coming":   data_dir / "ScenarioA_patients_coming.csv",
        "historic": data_dir / "ScenarioA_patients_historic.csv"
    }

    dfs = {k: pd.read_csv(v) for k, v in paths.items()}

    # Trim whitespace in headers
    dfs = {k: df.rename(columns={c: c.strip() for c in df.columns}) for k, df in dfs.items()}

    which = 'coming'  # 'current' | 'coming' | 'historic'
    input_df = dfs[which].copy()

    if which == 'current':
        weights = {'complexity': 0.20, 'acuity': 0.30, 'vitals': 0.30, 'waiting': 0.10, 'admission': 0.10}
    elif which == 'coming':
        weights = {'complexity': 0.30, 'acuity': 0.50, 'waiting': 0.20, 'vitals': 0.0, 'admission': 0.0}
    else:  # 'historic' or anything else
        weights = {'complexity': 0.25, 'acuity': 0.25, 'vitals': 0.25, 'waiting': 0.125, 'admission': 0.125}

    ranked = compute_mcda_scores(input_df, weights=weights, normalize_within_Speciality=True)
    ordered = rank_within_all_specialties(ranked, department_col='Speciality')

    save_name = f'mcda_ranked_patients_{which}.csv'
    ordered.to_csv(save_name, index=False)
    print(ordered.head())


  pseudo_patient_id  age sex  Complexity  Acuity Primary Diagnosis Summary  \
0            SP0978   95   F        1.48       5                   K20-K31   
1            SP0914   52   F        1.37       5                   K55-K64   
2            SP0994   72   F        1.43       4                   K90-K93   
3            SP0921   50   F        1.25       4                   K50-K52   
4            SP0935   23   M        0.95       5                   K20-K31   

          Speciality  Waiting Time (days) Discharge Dependence  \
0  Gastronenterology                   29                  Low   
1  Gastronenterology                    3                  Low   
2  Gastronenterology                   10                 High   
3  Gastronenterology                   55                  Low   
4  Gastronenterology                   17                  Low   

   Time since Admission (days) Vitals Trend  MCDA_score  \
0                          0.0       Stable    0.826131   
1               

In [9]:
# # -------------------------------
# # Example usage (combine current + coming and rank)
# # -------------------------------
# if __name__ == '__main__':
#     from pathlib import Path
#     import pandas as pd

#     data_dir = Path('../data/scenarioA/')
#     paths = {
#         "current":  data_dir / "ScenarioA_patients_current.csv",
#         "coming":   data_dir / "ScenarioA_patients_coming.csv",
#     }

#     # Load & trim header whitespace
#     dfs = {}
#     for name, p in paths.items():
#         df = pd.read_csv(p)
#         df = df.rename(columns={c: c.strip() for c in df.columns})
#         df["Dataset"] = name  # tag source
#         dfs[name] = df

#     # Combine current + coming
#     input_df = pd.concat([dfs["current"], dfs["coming"]], ignore_index=True)

#     # Choose unified weights for the combined list
#     # (you can tweak these; using the more "current"-like set)
#     weights = {
#         'complexity': 0.20,
#         'acuity':     0.30,
#         'vitals':     0.30,  # rows without vitals will auto-fill neutrals
#         'waiting':    0.10,
#         'admission':  0.10
#     }

#     ranked = compute_mcda_scores(
#         input_df,
#         weights=weights,
#         normalize_within_Speciality=True,   # normalize per Speciality
#         department_col='Speciality',
#         vitals_col='Vitals Trend',
#         complexity_col='Complexity',
#         acuity_col='Acuity',
#         waiting_col='Waiting Time (days)',
#         admission_col='Time since Admission (days)'
#     )

#     # Order by Speciality then MCDA rank (combined across current+coming)
#     ordered = rank_within_all_specialties(ranked, department_col='Speciality')

#     # Also add an overall priority rank (ignoring Speciality) if you want
#     ordered['overall_rank'] = ordered['MCDA_score'].rank(method='first', ascending=False).astype(int)

#     # Save results
#     ordered.to_csv('mcda_ranked_patients_current_plus_coming.csv', index=False)
#     print(ordered.head(10))


In [10]:

from pathlib import Path
import pandas as pd

def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Trim spaces and fix common header variants."""
    df = df.rename(columns={c: c.strip() for c in df.columns})

if __name__ == '__main__':
    data_dir = Path('../data/scenarioA/')
    current_csv = data_dir / "ScenarioA_patients_current.csv"

    # --- Load CURRENT only ---
    df = pd.read_csv(current_csv)
    df = _standardize_columns(df)
    df["Dataset"] = "current"  # not strictly needed, but harmless

    # --- MCDA weights (same as before) ---
    # Note: compute_mcda_scores was set up so that "sicker â†’ higher score".
    # For *early discharge*, we will rank ASCENDING later (healthier first).
    weights = {
        'complexity': 0.25,
        'acuity':     0.35,
        'vitals':     0.25,
        'waiting':    0.10,
        'admission':  0.05
    }

    # --- Compute MCDA on CURRENT only ---
    ranked = compute_mcda_scores(
        df,
        weights=weights,
        normalize_within_Speciality=True,
        department_col='Speciality',
        vitals_col='Vitals Trend',
        complexity_col='Complexity',
        acuity_col='Acuity',
        waiting_col='Waiting Time (days)',
        admission_col='Time since Admission (days)'
    )

    # --- Filter: blocker == "No Blocker" AND nextAction == "Discharge" ---
    # Be tolerant to case/spacing variants.
    def _norm_text(s):
        return (s.astype(str)
                 .str.strip()
                 .str.casefold())

    blocker_ok = _norm_text(ranked["blocker"]).eq("no blocker")
    action_ok  = _norm_text(ranked["nextAction"]).eq("discharge")
    candidates = ranked.loc[blocker_ok & action_ok].copy()

    # --- Flip the ranking: healthier first (lower MCDA_score â†’ higher priority) ---
    # Global early-discharge priority:
    candidates["early_discharge_rank"] = (
        candidates["MCDA_score"]
        .rank(method="first", ascending=True)  # ASCENDING â†’ healthier first
        .astype(int)
    )

    # Also provide a within-department (Speciality) rank, flipped the same way:
    candidates["early_discharge_rank_within_Speciality"] = (
        candidates.groupby("Speciality")["MCDA_score"]
        .rank(method="first", ascending=True)
        .astype(int)
    )

    # --- Output view ---
    display_cols = [
        "pseudo_patient_id",
        "Speciality",
        "MCDA_score",
        "early_discharge_rank",
        "early_discharge_rank_within_Speciality",
        "nextAction",
        "blocker"
    ]

    # Sort by our flipped global rank
    candidates = candidates.sort_values(
        ["early_discharge_rank", "Speciality", "MCDA_score"],
        ascending=[True, True, True]
    )

    # --- Save & preview ---
    out_path = data_dir / "mcda_early_discharge_priority_current.csv"
    candidates.to_csv(out_path, index=False)

    print(f"âœ… Early discharge priority (CURRENT only) saved to: {out_path}\n")
    print("Preview:\n")
    print(candidates[display_cols].head(20).to_string(index=False))


TypeError: 'NoneType' object does not support item assignment

In [11]:
# -------------------------------
# Example usage (combine current + coming with clear labels)
# -------------------------------
if __name__ == '__main__':
    from pathlib import Path
    import pandas as pd

    data_dir = Path('../data/scenarioA/')
    paths = {
        "current":  data_dir / "ScenarioA_patients_current.csv",
        "coming":   data_dir / "ScenarioA_patients_coming.csv",
    }

    dfs = {}
    for name, p in paths.items():
        df = pd.read_csv(p)
        df = df.rename(columns={c: c.strip() for c in df.columns})
        df["Dataset"] = name  # technical source tag
        dfs[name] = df

    # Combine both datasets
    combined_df = pd.concat([dfs["current"], dfs["coming"]], ignore_index=True)

    # Friendly label for display / export
    dataset_labels = {
        "current": "ðŸŸ© Current patient (in hospital)",
        "coming":  "ðŸŸ¦ Incoming patient (awaiting admission)"
    }
    combined_df["Source Label"] = combined_df["Dataset"].map(dataset_labels)

    # Unified MCDA weight configuration
    weights = {
        'complexity': 0.25,
        'acuity':     0.35,
        'vitals':     0.25,
        'waiting':    0.10,
        'admission':  0.05
    }

    # Compute MCDA scores
    ranked = compute_mcda_scores(
        combined_df,
        weights=weights,
        normalize_within_Speciality=True,
        department_col='Speciality',
        vitals_col='Vitals Trend',
        complexity_col='Complexity',
        acuity_col='Acuity',
        waiting_col='Waiting Time (days)',
        admission_col='Time since Admission (days)'
    )

    # Rank within each Speciality and globally
    ordered = rank_within_all_specialties(ranked, department_col='Speciality')
    ordered["overall_rank"] = ordered["MCDA_score"].rank(method="first", ascending=False).astype(int)

    # Make the output columns explicit and readable
    display_cols = [
        "pseudo_patient_id",
        "Speciality",
        "MCDA_score",
        "mcda_rank_within_Speciality",
        "overall_rank",
        "Source Label",     # pretty label
        "Dataset"           # raw technical tag
    ]

    # Save and show results
    save_path = data_dir / "mcda_ranked_patients_current_plus_coming.csv"
    ordered.to_csv(save_path, index=False)
    print(f"âœ… Combined ranking saved to {save_path}\n")
    print("Preview of ranked patients:\n")
    print(ordered[display_cols].head(15).to_string(index=False))


âœ… Combined ranking saved to ..\data\scenarioA\mcda_ranked_patients_current_plus_coming.csv

Preview of ranked patients:

pseudo_patient_id        Speciality  MCDA_score  mcda_rank_within_Speciality  overall_rank                            Source Label Dataset
           SP0814 Gastronenterology    0.773833                            1             3         ðŸŸ© Current patient (in hospital) current
           SP0884 Gastronenterology    0.769855                            2             5         ðŸŸ© Current patient (in hospital) current
           SP0838 Gastronenterology    0.760024                            3             7         ðŸŸ© Current patient (in hospital) current
           SP0853 Gastronenterology    0.744793                            4             8         ðŸŸ© Current patient (in hospital) current
           SP0978 Gastronenterology    0.635104                            5            20 ðŸŸ¦ Incoming patient (awaiting admission)  coming
           SP0888 Gastronent

In [12]:
# Early Discharge Priority List (CURRENT only, NO column renaming)
# ---------------------------------------------------------------
# Requires: compute_mcda_scores(df, ...)

from pathlib import Path
import pandas as pd

if __name__ == '__main__':
    data_dir = Path('../data/scenarioA/')
    current_csv = data_dir / "ScenarioA_patients_current.csv"

    # --- Load CURRENT only (no renaming/stripping) ---
    df = pd.read_csv(current_csv)

    # --- MCDA weights (unchanged) ---
    weights = {
        'complexity': 0.25,
        'acuity':     0.35,
        'vitals':     0.25,
        'waiting':    0.10,
        'admission':  0.05
    }

    # --- Compute MCDA on CURRENT only (using exact column names) ---
    ranked = compute_mcda_scores(
        df,
        weights=weights,
        normalize_within_Speciality=True,
        department_col='Speciality',
        vitals_col='Vitals Trend',
        complexity_col='Complexity',
        acuity_col='Acuity',
        waiting_col='Waiting Time (days)',
        admission_col='Time since Admission (days)'
    )

    # --- Filter: blocker == "No Blocker" AND nextAction == "Discharge" (exact matches) ---
    candidates = ranked[
        (ranked["blocker"] == "No Blocker") &
        (ranked["nextAction"] == "Discharge")
    ].copy()

    # --- Flip the ranking: healthier first (lower MCDA_score â†’ higher priority) ---
    candidates["early_discharge_rank"] = (
        candidates["MCDA_score"].rank(method="first", ascending=True).astype(int)
    )

    # Also provide a within-department rank with the same flipped logic
    candidates["early_discharge_rank_within_Speciality"] = (
        candidates.groupby("Speciality")["MCDA_score"]
        .rank(method="first", ascending=True).astype(int)
    )

    # --- Output view ---
    display_cols = [
        "pseudo_patient_id",
        "Speciality",
        "MCDA_score",
        "early_discharge_rank",
        "early_discharge_rank_within_Speciality",
        "nextAction",
        "blocker"
    ]

    # Sort by flipped global rank
    candidates = candidates.sort_values(
        ["early_discharge_rank", "Speciality", "MCDA_score"],
        ascending=[True, True, True]
    )

    # --- Save & preview ---
    out_path = data_dir / "mcda_early_discharge_priority_current.csv"
    candidates.to_csv(out_path, index=False)

    print(f"âœ… Early discharge priority (CURRENT only) saved to: {out_path}\n")
    print("Preview:\n")
    print(candidates[display_cols].head(20).to_string(index=False))


âœ… Early discharge priority (CURRENT only) saved to: ..\data\scenarioA\mcda_early_discharge_priority_current.csv

Preview:

pseudo_patient_id                Speciality  MCDA_score  early_discharge_rank  early_discharge_rank_within_Speciality nextAction    blocker
           SP0734 General Internal Medicine    0.050395                     1                                       1  Discharge No Blocker
           SP0887         Gastronenterology    0.110967                     2                                       1  Discharge No Blocker
           SP0873 General Internal Medicine    0.120524                     3                                       2  Discharge No Blocker
           SP0839 General Internal Medicine    0.127744                     4                                       3  Discharge No Blocker
           SP0705 General Internal Medicine    0.160796                     5                                       4  Discharge No Blocker
           SP0849        Geriatric 

SECTION 3

In [13]:
# === Investment Priorities (Speciality / nextAction / blocker) ===

import numpy as np

def _priority_table(df: pd.DataFrame, group_col: str) -> pd.DataFrame:
    if "overall_rank" not in df.columns or "MCDA_score" not in df.columns:
        raise ValueError("Expected columns 'overall_rank' and 'MCDA_score' in the ordered DataFrame.")
    if group_col not in df.columns:
        raise ValueError(f"Expected grouping column '{group_col}' in the ordered DataFrame.")

    total_n = len(df)
    top10_cut = 10
    top25p_cut = max(1, int(np.ceil(0.25 * total_n)))

    g = df.groupby(group_col, dropna=False)

    out = g.agg(
        patients=("overall_rank", "count"),
        mean_overall_rank=("overall_rank", "mean"),
        median_overall_rank=("overall_rank", "median"),
        mean_MCDA=("MCDA_score", "mean"),
        top10_count=("overall_rank", lambda s: (s <= top10_cut).sum()),
        top25p_count=("overall_rank", lambda s: (s <= top25p_cut).sum()),
        priority_score=("overall_rank", lambda s: (1.0 / s).sum())
    ).reset_index()

    out["priority_per_patient"] = out["priority_score"] / out["patients"].replace(0, np.nan)
    out = out.sort_values(["priority_score", "patients"], ascending=[False, False]).reset_index(drop=True)
    return out

# Build tables
priority_by_speciality = _priority_table(ordered, "Speciality")
priority_by_nextaction  = _priority_table(ordered, "nextAction")
priority_by_blocker     = _priority_table(ordered, "blocker")

# Save
inv_dir = data_dir  # reuse your folder
priority_by_speciality.to_csv(inv_dir / "invest_priority_by_speciality.csv", index=False)
priority_by_nextaction.to_csv(inv_dir / "invest_priority_by_nextAction.csv", index=False)
priority_by_blocker.to_csv(inv_dir / "invest_priority_by_blocker.csv", index=False)

# Show previews
print("\n=== INVESTMENT PRIORITY â€” BY SPECIALITY ===")
print(priority_by_speciality.head(15).to_string(index=False))

print("\n=== INVESTMENT PRIORITY â€” BY NEXTACTION ===")
print(priority_by_nextaction.head(15).to_string(index=False))

print("\n=== INVESTMENT PRIORITY â€” BY BLOCKER ===")
print(priority_by_blocker.head(15).to_string(index=False))



=== INVESTMENT PRIORITY â€” BY SPECIALITY ===
               Speciality  patients  mean_overall_rank  median_overall_rank  mean_MCDA  top10_count  top25p_count  priority_score  priority_per_patient
General Internal Medicine       165         152.000000                161.0   0.365348            4            37        2.729707              0.016544
            Ophthalmology        15         112.666667                114.0   0.462579            2             6        1.479654              0.098644
        Gastronenterology        47         149.361702                133.0   0.373579            4            12        1.233597              0.026247
  Trauma and Orthopaedics        38         140.710526                139.5   0.384202            0            12        0.544118              0.014319
       Geriatric Medicine        25         172.960000                167.0   0.316668            0             4        0.200271              0.008011
              Paediatrics        10      

In [14]:
# === Cross-tabs: nextAction ON Speciality, blocker ON Speciality ===
import numpy as np
import pandas as pd

def _priority_by_speciality(df: pd.DataFrame) -> pd.DataFrame:
    """Priority score per Speciality to drive ordering."""
    g = df.groupby("Speciality", dropna=False)
    out = g.agg(
        patients=("overall_rank", "count"),
        priority_score=("overall_rank", lambda s: (1.0 / s).sum())
    ).reset_index()
    # rank specialities for consistent ordering
    out["speciality_priority_rank"] = out["priority_score"].rank(
        method="first", ascending=False
    ).astype(int)
    return out.sort_values("speciality_priority_rank")

def _crosstab_on_speciality(df: pd.DataFrame, cat_col: str) -> pd.DataFrame:
    """
    Build a table with rows for (Speciality, category) including:
      patients, pct_within_speciality, top10_count, top25p_count,
      mean_overall_rank, mean_MCDA, priority_score (sum 1/rank).
    Then append an ALL SPECIALITIES summary.
    """
    if "Speciality" not in df.columns:
        raise ValueError("Expected column 'Speciality'.")
    if cat_col not in df.columns:
        raise ValueError(f"Expected column '{cat_col}'.")
    if "overall_rank" not in df.columns or "MCDA_score" not in df.columns:
        raise ValueError("Expected 'overall_rank' and 'MCDA_score' in DataFrame.")

    total_n = len(df)
    top10_cut = 10
    top25p_cut = max(1, int(np.ceil(0.25 * total_n)))

    # total patients in each Speciality
    spec_totals = df.groupby("Speciality", dropna=False)["overall_rank"].count().rename("spec_total")

    # primary group
    g = df.groupby(["Speciality", cat_col], dropna=False)
    tab = g.agg(
        patients=("overall_rank", "count"),
        mean_overall_rank=("overall_rank", "mean"),
        mean_MCDA=("MCDA_score", "mean"),
        top10_count=("overall_rank", lambda s: (s <= top10_cut).sum()),
        top25p_count=("overall_rank", lambda s: (s <= top25p_cut).sum()),
        priority_score=("overall_rank", lambda s: (1.0 / s).sum()),
    ).reset_index()

    # pct of each speciality
    tab = tab.merge(spec_totals, on="Speciality", how="left")
    tab["pct_within_speciality"] = (tab["patients"] / tab["spec_total"] * 100.0).round(2)

    # order Specialities by their total priority pressure
    spec_order = (
        tab.groupby("Speciality", dropna=False)["priority_score"].sum()
        .sort_values(ascending=False)
        .index.tolist()
    )

    # order categories within each speciality based on priority pressure
    cat_priority = (
        tab.groupby([cat_col], dropna=False)["priority_score"].sum()
        .sort_values(ascending=False)
        .to_dict()
    )
    tab["__spec_key"] = tab["Speciality"].apply(lambda x: spec_order.index(x))
    tab["__cat_key"] = tab[cat_col].apply(lambda x: -cat_priority.get(x, -0.0))

    tab = tab.sort_values(["__spec_key", "__cat_key", "priority_score"], ascending=[True, True, False])
    tab = tab.drop(columns=["spec_total", "__spec_key", "__cat_key"])

    # --- ALL SPECIALITIES ---
    all_g = df.groupby(cat_col, dropna=False)
    all_tab = all_g.agg(
        patients=("overall_rank", "count"),
        mean_overall_rank=("overall_rank", "mean"),
        mean_MCDA=("MCDA_score", "mean"),
        top10_count=("overall_rank", lambda s: (s <= top10_cut).sum()),
        top25p_count=("overall_rank", lambda s: (s <= top25p_cut).sum()),
        priority_score=("overall_rank", lambda s: (1.0 / s).sum()),
    ).reset_index()
    all_tab.insert(0, "Speciality", "ALL SPECIALITIES")
    all_tab["pct_within_speciality"] = (all_tab["patients"] / total_n * 100.0).round(2)

    return pd.concat([tab, all_tab], ignore_index=True)

# ---- Create tables ----
nextaction_on_speciality = _crosstab_on_speciality(ordered, "nextAction")
blocker_on_speciality    = _crosstab_on_speciality(ordered, "blocker")

# Save
nextaction_on_speciality.to_csv(data_dir / "nextAction_on_speciality.csv", index=False)
blocker_on_speciality.to_csv(data_dir / "blocker_on_speciality.csv", index=False)

# Preview
print("\n=== NEXTACTION ON SPECIALITY ===")
print(nextaction_on_speciality.head(25).to_string(index=False))

print("\n=== BLOCKER ON SPECIALITY ===")
print(blocker_on_speciality.head(25).to_string(index=False))



=== NEXTACTION ON SPECIALITY ===
               Speciality                nextAction  patients  mean_overall_rank  mean_MCDA  top10_count  top25p_count  priority_score  pct_within_speciality
General Internal Medicine                 Treatment        35         150.914286   0.373250            2             9        0.990916                  21.21
General Internal Medicine                 Discharge        35         162.571429   0.350334            2             7        0.551190                  21.21
General Internal Medicine                Assessment         6         152.666667   0.367883            0             2        0.109111                   3.64
General Internal Medicine Diagnostics/Investigation        26         145.115385   0.378951            0             5        0.363721                  15.76
General Internal Medicine                    Review        16         113.750000   0.432345            0             5        0.255378                   9.70
General Internal M

In [15]:
# === WAITING TIME PRESSURE BY SPECIALITY ===

import pandas as pd
import numpy as np

def waiting_time_by_speciality(df: pd.DataFrame) -> pd.DataFrame:
    """
    Show waiting-time pressure by Speciality:
      - mean / median / max waiting time
      - % of Top-25% priority patients (based on overall_rank)
      - patients count
    Sorted using same speciality order as before (priority pressure).
    """

    if "Speciality" not in df.columns:
        raise ValueError("Expected column 'Speciality'.")
    if "Waiting Time (days)" not in df.columns:
        raise ValueError("Expected column 'Waiting Time (days)' in dataset.")
    if "overall_rank" not in df.columns:
        raise ValueError("Expected column 'overall_rank' (priority ranking).")

    total_n = len(df)
    top25_cut = max(1, int(np.ceil(0.25 * total_n)))

    g = df.groupby("Speciality", dropna=False)

    out = g.agg(
        patients=("pseudo_patient_id", "count"),
        mean_waiting_days=("Waiting Time (days)", "mean"),
        median_waiting_days=("Waiting Time (days)", "median"),
        max_waiting_days=("Waiting Time (days)", "max"),
        top25p_count=("overall_rank", lambda s: (s <= top25_cut).sum()),
        priority_score=("overall_rank", lambda s: (1.0 / s).sum())
    ).reset_index()

    # % of speciality in global top 25% priority
    out["pct_in_top_25%"] = (out["top25p_count"] / out["patients"] * 100).round(1)

    # use same speciality ordering as investment (priority_score desc)
    out = out.sort_values("priority_score", ascending=False).reset_index(drop=True)

    return out


waiting_by_speciality = waiting_time_by_speciality(ordered)

waiting_by_speciality.to_csv(
    data_dir / "waiting_time_by_speciality.csv", index=False
)

print("\n=== WAITING TIME BY SPECIALITY (SORTED BY PRIORITY PRESSURE) ===")
print(waiting_by_speciality.to_string(index=False))



=== WAITING TIME BY SPECIALITY (SORTED BY PRIORITY PRESSURE) ===
               Speciality  patients  mean_waiting_days  median_waiting_days  max_waiting_days  top25p_count  priority_score  pct_in_top_25%
General Internal Medicine       165          46.927273                 19.0               377            37        2.729707            22.4
            Ophthalmology        15          67.266667                 34.0               297             6        1.479654            40.0
        Gastronenterology        47          39.404255                 23.0               214            12        1.233597            25.5
  Trauma and Orthopaedics        38          82.605263                 52.0               443            12        0.544118            31.6
       Geriatric Medicine        25          70.240000                 32.0               490             4        0.200271            16.0
              Paediatrics        10         137.000000                 50.5               890 