# Ambulance Dispatching Mathematical Modeling Formulation

<div class="alert-danger">
# HAVEN'T CHECKED IT IN DETAIL YET
</div>

In [7]:
import pandas as pd
import numpy as np

import gurobipy as gp
from gurobipy import GRB, Model, quicksum

from collections import defaultdict

In [8]:
# gurobi_mdp_lp.py
def build_mdp_lp(
    S,                      # iterable of states
    CallType,               # iterable of (h, i) tuples
    X,                      # dict: X[(s,(h,i))] -> iterable of actions j
    u,                      # dict: u[(h,i,j)] -> reward u_ij^h (float)
    P                       # dict: P[((s,j,(h,i)), (s_prime,(h_prime,i_prime)))] -> prob
):
    """
    Build the occupancy-measure LP:
      max   sum_{s,(h,i),j in X(s,(h,i))} u_{ij}^h * y[s,j,(h,i)]
      s.t.  flow balance for each (s',(h',i')) and normalization
            y >= 0

    Parameters
    ----------
    S : iterable
        States (hashable)
    CallType : iterable
        Call types (h,i) pairs (hashable)
    X : dict
        {(s,(h,i)) : iterable of j}
    u : dict
        {(h,i,j) : reward}
    P : dict
        {((s,j,(h,i)), (s_prime,(h_prime,i_prime))) : probability}

    Returns
    -------
    model, y
        model: Gurobi Model
        y: dict-like decision variables keyed by (s,j,(h,i))
    """
    m = Model("MDP_OccupancyLP")
    m.Params.OutputFlag = 0  # quiet by default; set to 1 for logs

    # Decision variables y[s,j,(h,i)] >= 0 only for feasible (s,(h,i),j)
    y = {}
    for s in S:
        for ct in CallType:
            key = (s, ct)
            for j in X.get(key, []):
                varname = f"y[{s},{j},{ct}]"
                y[(s, j, ct)] = m.addVar(lb=0.0, name=varname)
    m.update()

    # Objective: sum u_ij^h * y
    m.setObjective(
        quicksum(u.get((ct[0], ct[1], j), 0.0) * y[(s, j, ct)]
                 for (s, j, ct) in y.keys()),
        GRB.MAXIMIZE
    )

    # Flow-balance constraints: for each (s', (h',i'))
    # sum_{j' in X(s',(h',i'))} y(s',j',(h',i')) - sum_{s,h,i,j} p(...) y(s,j,(h,i)) = 0
    # To make this efficient, pre-aggregate inbound terms for each (s',ct')
    inbound = defaultdict(list)  # (s_prime, ct_prime) -> list of ((s,j,ct), prob)
    for ((s, j, ct), (sp, ctp)) , prob in P.items():
        if prob != 0.0:
            inbound[(sp, ctp)].append(((s, j, ct), prob))

    for sp in S:
        for ctp in CallType:
            lhs = quicksum(y[(sp, jp, ctp)] for jp in X.get((sp, ctp), []))
            rhs = quicksum(prob * y[(s, j, ct)]
                           for ((s, j, ct), prob) in inbound.get((sp, ctp), []))
            m.addConstr(lhs - rhs == 0.0, name=f"flow[{sp},{ctp}]")

    # Normalization: sum y = 1
    m.addConstr(quicksum(y[v] for v in y) == 1.0, name="normalization")

    m.update()
    return m, y

In [9]:
def extract_policy(y_vars, X):
    """
    Given optimized y and the action sets X, compute a stationary randomized policy:
      q(s,(h,i))[j] = y(s,j,(h,i)) / sum_{j' in X(s,(h,i))} y(s,j',(h,i))
    Returns dict: policy[(s,(h,i))] -> dict {j: prob}
    """
    # Sum per (s,ct)
    denom = defaultdict(float)
    for (s, j, ct), var in y_vars.items():
        denom[(s, ct)] += var.X

    policy = {}
    for (s, ct), total in denom.items():
        choices = X.get((s, ct), [])
        pi = {}
        if total > 0:
            for j in choices:
                y_key = (s, j, ct)
                if y_key in y_vars:
                    pi[j] = y_vars[y_key].X / total
        else:
            # unreachable (s,ct): put zero probs
            for j in choices:
                pi[j] = 0.0
        policy[(s, ct)] = pi
    return policy