In [2]:
import json
import gzip
import requests
from pathlib import Path
from typing import List, Dict, Tuple, Sequence, Union, Optional
from dataclasses import dataclass, field

import numpy as np
import scipy.sparse as sp
import gurobipy as gp
from gurobipy import GRB
from tqdm.auto import tqdm

# ---------------------------------------------------------------------------
#  Constants & cache                                                          
# ---------------------------------------------------------------------------
INSTANCES_URL = "https://axavier.org/UnitCommitment.jl/0.4/instances"
_CACHE = Path(__file__).resolve().parent / "instances"
_CACHE.mkdir(parents=True, exist_ok=True)

# ---------------------------------------------------------------------------
#  Data classes (faithful to UnitCommitment.jl)                               
# ---------------------------------------------------------------------------
Number = Union[int, float]
Series = List[Number]

@dataclass
class CostSegment:
    amount: Series
    cost: Series

@dataclass
class StartupCategory:
    delay_steps: int
    cost: float

@dataclass
class Bus:
    name: str
    index: int
    load: Series
    thermal_units: List["ThermalUnit"] = field(default_factory=list)
    price_sensitive_loads: List["PriceSensitiveLoad"] = field(default_factory=list)
    profiled_units: List["ProfiledUnit"] = field(default_factory=list)
    storage_units: List["StorageUnit"] = field(default_factory=list)

@dataclass
class Reserve:
    name: str
    type: str  # "spinning", "flex-up", etc.
    amount: Series
    thermal_units: List["ThermalUnit"]
    shortfall_penalty: float

@dataclass
class ThermalUnit:
    name: str
    bus: Bus
    max_power: Series
    min_power: Series
    must_run: Series
    min_power_cost: Series
    segments: List[CostSegment]
    min_up: int
    min_down: int
    ramp_up: float
    ramp_down: float
    startup_limit: float
    shutdown_limit: float
    initial_status: Optional[int]
    initial_power: Optional[float]
    startup_categories: List[StartupCategory]
    reserves: List[Reserve]
    commitment_status: List[Optional[bool]]

@dataclass
class ProfiledUnit:
    name: str
    bus: Bus
    min_power: Series
    max_power: Series
    cost: Series

@dataclass
class StorageUnit:
    name: str
    bus: Bus
    min_level: Series
    max_level: Series
    simultaneous: Series
    charge_cost: Series
    discharge_cost: Series
    charge_eff: Series
    discharge_eff: Series
    loss_factor: Series
    min_charge: Series
    max_charge: Series
    min_discharge: Series
    max_discharge: Series
    initial_level: float
    last_min: float
    last_max: float

@dataclass
class TransmissionLine:
    name: str
    index: int
    source: Bus
    target: Bus
    susceptance: float
    normal_limit: Series
    emergency_limit: Series
    flow_penalty: Series

@dataclass
class Contingency:
    name: str
    lines: List[TransmissionLine]
    units: List[ThermalUnit]

@dataclass
class PriceSensitiveLoad:
    name: str
    bus: Bus
    demand: Series
    revenue: Series

@dataclass
class UnitCommitmentScenario:
    name: str
    probability: float
    buses_by_name: Dict[str, Bus]
    buses: List[Bus]
    contingencies_by_name: Dict[str, Contingency]
    contingencies: List[Contingency]
    lines_by_name: Dict[str, TransmissionLine]
    lines: List[TransmissionLine]
    power_balance_penalty: Series
    price_sensitive_loads_by_name: Dict[str, PriceSensitiveLoad]
    price_sensitive_loads: List[PriceSensitiveLoad]
    reserves: List[Reserve]
    reserves_by_name: Dict[str, Reserve]
    time: int
    time_step: int
    thermal_units_by_name: Dict[str, ThermalUnit]
    thermal_units: List[ThermalUnit]
    profiled_units_by_name: Dict[str, ProfiledUnit]
    profiled_units: List[ProfiledUnit]
    storage_units_by_name: Dict[str, StorageUnit]
    storage_units: List[StorageUnit]
    isf: sp.csr_matrix
    lodf: sp.csr_matrix
    source: Optional[str] = None

@dataclass
class UnitCommitmentInstance:
    time: int
    scenarios: List[UnitCommitmentScenario]

    @property
    def deterministic(self) -> UnitCommitmentScenario:
        if len(self.scenarios) != 1:
            raise ValueError("Instance is stochastic; pick a scenario explicitly")
        return self.scenarios[0]

# ---------------------------------------------------------------------------
#  Utility helpers                                                            
# ---------------------------------------------------------------------------

def _download(url: str, dst: Path, chunk: int = 1 << 20) -> None:
    with requests.get(url, stream=True, timeout=60) as r:
        r.raise_for_status()
        total = int(r.headers.get("content-length", 0))
        with dst.open("wb") as fh, tqdm(total=total, unit="B", unit_scale=True, disable=total == 0) as bar:
            for blk in r.iter_content(chunk_size=chunk):
                fh.write(blk)
                bar.update(len(blk))

def read_benchmark(name: str) -> UnitCommitmentInstance:
    gz_name = f"{name}.json.gz"
    local = _CACHE / gz_name
    if not local.exists():
        print(f"Downloading {gz_name}…")
        _download(f"{INSTANCES_URL}/{gz_name}", local)
    return read(local)

def _read_json(path: Union[str, Path]) -> dict:
    path = Path(path)
    if str(path).endswith(".gz"):
        with gzip.open(path, "rt", encoding="utf-8") as fh:
            return json.load(fh)
    with path.open("r", encoding="utf-8") as fh:
        return json.load(fh)

# scalar / timeseries helpers

def _scalar(val, default=None):
    return default if val is None else val

def _timeseries(val, T: int, *, default=None):
    if val is None:
        return default if default is not None else [None] * T
    return val if isinstance(val, list) else [val] * T

# (migrations omitted for brevity – assume files are v0.4 already)

# ---------------------------------------------------------------------------
#  JSON ➜ objects (shortened from earlier)                                    
# ---------------------------------------------------------------------------

def _from_json(j: dict) -> UnitCommitmentScenario:
    # Only supports v0.4 datasets (all benchmarks ≥0.4)
    par = j["Parameters"]
    time_horizon = int(par["Time horizon (min)"])
    time_step = int(_scalar(par.get("Time step (min)"), 60))
    T = time_horizon // time_step

    # Build buses
    buses: List[Bus] = []
    name_to_bus = {}
    for idx, (bname, bdict) in enumerate(j["Buses"].items(), start=1):
        bus = Bus(name=bname, index=idx, load=_timeseries(bdict["Load (MW)"], T))
        buses.append(bus)
        name_to_bus[bname] = bus

    # Reserves
    reserves: List[Reserve] = []
    name_to_reserve = {}
    for rname, rdict in j.get("Reserves", {}).items():
        r = Reserve(name=rname, type=rdict["Type"].lower(), amount=_timeseries(rdict["Amount (MW)"], T), thermal_units=[], shortfall_penalty=_scalar(rdict.get("Shortfall penalty ($/MW)"), 10))
        reserves.append(r)
        name_to_reserve[rname] = r

    # Generators (thermal + profiled)
    thermal_units: List[ThermalUnit] = []
    profiled_units: List[ProfiledUnit] = []
    name_to_unit = {}
    for gname, gdict in j["Generators"].items():
        bus = name_to_bus[gdict["Bus"]]
        if gdict["Type"].lower() == "thermal":
            curve_mw = gdict["Production cost curve (MW)"]
            curve_cost = gdict["Production cost curve ($)"]
            K = len(curve_mw)
            curve_mw = np.column_stack([_timeseries(curve_mw[k], T) for k in range(K)])
            curve_cost = np.column_stack([_timeseries(curve_cost[k], T) for k in range(K)])
            min_power = curve_mw[:, 0].tolist()
            max_power = curve_mw[:, -1].tolist()
            min_power_cost = curve_cost[:, 0].tolist()
            segments = []
            for k in range(1, K):
                amount = (curve_mw[:, k] - curve_mw[:, k - 1]).tolist()
                cost = ((curve_cost[:, k] - curve_cost[:, k - 1]) / np.maximum(amount, 1e-9)).tolist()
                segments.append(CostSegment(amount, cost))
            tu = ThermalUnit(
                name=gname,
                bus=bus,
                max_power=max_power,
                min_power=min_power,
                must_run=_timeseries(gdict.get("Must run?"), T, default=[False] * T),
                min_power_cost=min_power_cost,
                segments=segments,
                min_up=int(_scalar(gdict.get("Minimum uptime (h)"), 1)),
                min_down=int(_scalar(gdict.get("Minimum downtime (h)"), 1)),
                ramp_up=_scalar(gdict.get("Ramp up limit (MW)"), 1e6),
                ramp_down=_scalar(gdict.get("Ramp down limit (MW)"), 1e6),
                startup_limit=_scalar(gdict.get("Startup limit (MW)"), 1e6),
                shutdown_limit=_scalar(gdict.get("Shutdown limit (MW)"), 1e6),
                initial_status=int(_scalar(gdict.get("Initial status (h)"), 0)),
                initial_power=_scalar(gdict.get("Initial power (MW)"), 0.0),
                startup_categories=[StartupCategory(delay_steps=1, cost=0.0)],
                reserves=[name_to_reserve[n] for n in gdict.get("Reserve eligibility", [])],
                commitment_status=_timeseries(gdict.get("Commitment status"), T, default=[None] * T),
            )
            bus.thermal_units.append(tu)
            for r in tu.reserves:
                r.thermal_units.append(tu)
            thermal_units.append(tu)
            name_to_unit[gname] = tu
        else:
            pu = ProfiledUnit(name=gname, bus=bus, min_power=_timeseries(_scalar(gdict.get("Minimum power (MW)"), 0.0), T), max_power=_timeseries(gdict["Maximum power (MW)"], T), cost=_timeseries(gdict["Cost ($/MW)"], T))
            bus.profiled_units.append(pu)
            profiled_units.append(pu)

    # Lines
    lines: List[TransmissionLine] = []
    name_to_line = {}
    for idx, (lname, ldict) in enumerate(j.get("Transmission lines", {}).items(), start=1):
        line = TransmissionLine(name=lname, index=idx, source=name_to_bus[ldict["Source bus"]], target=name_to_bus[ldict["Target bus"]], susceptance=float(ldict["Susceptance (S)"]), normal_limit=_timeseries(ldict.get("Normal flow limit (MW)"), T, default=[1e8] * T), emergency_limit=_timeseries(ldict.get("Emergency flow limit (MW)"), T, default=[1e8] * T), flow_penalty=_timeseries(ldict.get("Flow limit penalty ($/MW)"), T, default=[5000.0] * T))
        lines.append(line)
        name_to_line[lname] = line

    # Contingencies (affects outaged lines & units)
    contingencies: List[Contingency] = []
    for cname, cdict in j.get("Contingencies", {}).items():
        contingencies.append(Contingency(name=cname, lines=[name_to_line[l] for l in cdict.get("Affected lines", [])], units=[name_to_unit[u] for u in cdict.get("Affected units", [])]))

    # Price‑sensitive load
    psloads: List[PriceSensitiveLoad] = []
    for lname, ldict in j.get("Price-sensitive loads", {}).items():
        pl = PriceSensitiveLoad(name=lname, bus=name_to_bus[ldict["Bus"]], demand=_timeseries(ldict["Demand (MW)"], T), revenue=_timeseries(ldict["Revenue ($/MW)"], T))
        psloads.append(pl)
        pl.bus.price_sensitive_loads.append(pl)

    # Storage (omitted for brevity)
    storage_units: List[StorageUnit] = []

    isf = sp.csr_matrix((len(lines), len(buses) - 1), dtype=float)
    lodf = sp.csr_matrix((len(lines), len(lines)), dtype=float)

    sc = UnitCommitmentScenario(name=j["Parameters"].get("Scenario name", ""), probability=float(j["Parameters"].get("Scenario weight", 1)), buses_by_name={b.name: b for b in buses}, buses=buses, contingencies_by_name={c.name: c for c in contingencies}, contingencies=contingencies, lines_by_name={l.name: l for l in lines}, lines=lines, power_balance_penalty=_timeseries(j["Parameters"].get("Power balance penalty ($/MW)"), T, default=[1000.0] * T), price_sensitive_loads_by_name={pl.name: pl for pl in psloads}, price_sensitive_loads=psloads, reserves=reserves, reserves_by_name=name_to_reserve, time=T, time_step=time_step, thermal_units_by_name={tu.name: tu for tu in thermal_units}, thermal_units=thermal_units, profiled_units_by_name={pu.name: pu for pu in profiled_units}, profiled_units=profiled_units, storage_units_by_name={}, storage_units=storage_units, isf=isf, lodf=lodf)

    return sc

def read(path_or_paths: Union[str, Sequence[str]]) -> UnitCommitmentInstance:
    if isinstance(path_or_paths, (list, tuple)):
        scenarios = [_from_json(_read_json(p)) for p in path_or_paths]


NameError: name '__file__' is not defined