In [None]:
from csv import DictReader
from collections import Counter

with open("/code/data/optimal_project_lists.csv") as f:
    projects = {}
    reader = DictReader(f)
    for row in reader:
        projects[row["budget"]] = row["projects"]

# counts of total arterials
counts = Counter()

# list of project ids keyed by budget
proj_by_budget = {}

for k, v in projects.items():
    project_list = v.replace("[", "").replace("]", "").split(",")
    counts.update(project_list)
    assert len(set(project_list)) == len(project_list), f"{k} has duplicate projects"
    proj_by_budget.update({k: project_list})

# some arterials are included in many budgets, which is expected
print(counts.most_common(10))

In [None]:
import pickle
from typing import List
from collections import defaultdict, Counter

# counts of arterial ids in the proj2art mapping
# if each arterial belongs to only one mapping, we'd expect every value to have a count of 1
count = Counter()

# mapping of arterials ids to projects ids
# if each arterial belongs to only one mapping, we'd expect every value to have a length of 1
art2proj = defaultdict(lambda: [])

# mapping of project ids to arterials
proj2art = defaultdict(lambda: [])

# mapping of budgets to arterials
art_by_budg = {}

with open("/code/data/proj2artid.pkl", "rb") as f:
    mapping: List[List[int]] = pickle.load(f)

for i, l in enumerate(mapping):
    count.update(l)
    proj2art.update({i: l})
    for v in l:
        art2proj.update({v: art2proj[v] + [i]})


for k, v in proj_by_budget.items():
    art_by_budg[k] = []

for k, v in proj_by_budget.items():
    for proj in v:
        art_by_budg[k] += proj2art[int(proj)]

# does every budget have a unique list of arterials underlying the project list?
for budget_id, arterial_list in art_by_budg.items():
    assert len(arterial_list) == len(
        set(arterial_list)
    ), f"budget {budget_id} has duplicate arterials!"

# Yes, it does, which means that project-arterial-budget is unique, if not project-arterial

# are some arterial ids included in multiple projects?
print(count.most_common(5))
# Yes. For example, arterial 5691 is in projects 51, 53, 310.
print(art2proj[5691])

# assert that the previous holds in the original mapping
# that is, that arterial 5691 is in mappings 51, 53, 310
assert 5691 in mapping[51]
assert 5691 in mapping[53]
assert 5691 in mapping[310]

# display the projects that contain arterial 5691
# and note that both 310 and 53 contain 51 (not sure it matters at all)
print(sorted(mapping[51]))
print(sorted(mapping[53]))
print(sorted(mapping[310]))