This notebook shows you how to use the aggregates TODO.

In [6]:
import os
import json

from typing import List, Dict, Any

datasets_by_type: Dict[str, List[str]] = {
        "general": ["census"],
        "partisan": ["election"],
        "minority": ["vap", "cvap"],
        "compactness": ["shapes"],
        "splitting": ["census"],
    }
agg_type: str = "compactness"

filename: str = "~/local/beta-ensembles/sample/NC_congress/NC_congress_T0.01_S0.0_R0_Vcut-edges-rmst/NC_congress_T0.01_S0.0_R0_Vcut-edges-rmst_compactness_bydistrict.jsonl"
with open(os.path.expanduser(filename),'r', encoding="utf-8") as f:
    data: List[Dict[str, Any]] = [json.loads(line) for line in f]

def extract_aggregates(data, agg_type:str) -> Dict[str, Dict[str, Any]]:
    """Extract the by-district aggregates from the raw data. Ignore datasets & dataset types. Assume one dataset per type."""

    aggregates: Dict[str, Dict[str, Any]] = dict()

    for i, record in enumerate(data):
        assert "_tag_" in record, "Record does not contain '_tag_' key"
        if record["_tag_"] != "by-district":
            continue

        name: str = record["name"]
        
        collection: Dict[str, Any] = dict()
        for dataset in datasets_by_type[agg_type]:
            # Skip over the dataset type and dataset name
            aggs_list: List[Dict[str, List[Any]]] = record["by-district"][dataset].values()
            # Make the aggregates a single dictionary again
            aggs_dict: Dict[str, List[Any]] = {k:v for agg in aggs_list for k, v in agg.items()}
            collection.update(aggs_dict)

        aggregates[name] = collection

    return aggregates

aggregates: Dict[str, Dict[str, Any]] = extract_aggregates(data, agg_type)
aggregates

{'000002500': {'area': [13.852160251829458,
   1.2300019711899999,
   0.5702962220939997,
   1.6100590872435017,
   0.5947049196679999,
   0.34879153740750013,
   3.6773471019679986,
   0.6852026138114994,
   1.2659340099415004,
   0.6080692354295006,
   1.1057451566415015,
   1.41513884959,
   0.13017338315299976,
   0.5314924928894995,
   0.07920367080200016],
  'diameter': [0.0,
   1.9179494308330975,
   1.4184228373048051,
   2.7835023883704477,
   1.4926055457127574,
   1.1583531040593733,
   2.9920933798135496,
   1.5140868930527074,
   2.1147311298678124,
   1.3297552860012598,
   1.9604368367765452,
   2.7330768365360765,
   0.5679222247977375,
   1.7147254838748969,
   0.4624805503640095],
  'perimeter': [0.0,
   9.807370418122854,
   7.46523326970865,
   9.891541862697546,
   6.311719259375535,
   5.21260996692771,
   12.97717134825357,
   7.899018218968315,
   10.216615815370789,
   6.605961078821914,
   8.92134326977276,
   9.3069350469125,
   2.898078083647163,
   6.327851