In [7]:
"""
IPython cell – population statistics **plus full family listing** ✔️
(Extends previous version with role-normalisation & apartment counts.)

New feature
-----------
• After the main report a section “— Families —” lists every family
  (connected component from `other_family_members`) on its own line.

You may adjust `DATA_FILE` if your JSON is elsewhere.
"""

from __future__ import annotations
import json, warnings
from pathlib import Path
from collections import Counter, defaultdict, deque
from typing import Dict, List, Set, Any

# --------------------------------------------------------------------- #
#  Files                                                                #
# --------------------------------------------------------------------- #
DATA_FILE   = Path("population_info_complete.json")   # change if needed
ADULTS_FILE = Path("population_info.json")

# --------------------------------------------------------------------- #
#  I/O helpers                                                          #
# --------------------------------------------------------------------- #
def load_population(path: Path = DATA_FILE) -> Dict[str, Dict[str, Any]]:
    return json.loads(path.read_text(encoding="utf-8"))

def save_adult_population(pop: Dict[str, Dict[str, Any]],
                          out_path: Path = ADULTS_FILE) -> None:
    adults = {n: p for n, p in pop.items() if p.get("age", 0) >= 18}
    out_path.write_text(json.dumps(adults, indent=2, ensure_ascii=False),
                        encoding="utf-8")
    print(f"✅  Saved {len(adults)} adult profiles → {out_path}")

# --------------------------------------------------------------------- #
#  Role normalisation (fixes stray 'hun', 'hus ', etc.)                 #
# --------------------------------------------------------------------- #
_CANON = {
    "single": "single",
    "husband": "husband", "hus": "husband", "husb": "husband",
    "hun": "husband", "hub": "husband",
    "wife": "wife",
    "son": "son",
    "daughter": "daughter",
}
def normalise_role(raw: str) -> str:
    if not isinstance(raw, str):
        return "unknown"
    key = raw.strip().lower()
    if key in _CANON:
        return _CANON[key]
    warnings.warn(f"Unrecognised family_role '{raw}' → counted as 'unknown'")
    return "unknown"

# --------------------------------------------------------------------- #
#  Build families (connected components)                                #
# --------------------------------------------------------------------- #
def _extract_names(obj: Any) -> List[str]:
    if isinstance(obj, str):
        return [obj]
    if isinstance(obj, (list, tuple, set)):
        names: List[str] = []
        for x in obj:
            names.extend(_extract_names(x))
        return names
    if isinstance(obj, dict):
        names: List[str] = []
        for v in obj.values():
            names.extend(_extract_names(v))
        return names
    return []

def build_families(pop: Dict[str, Dict[str, Any]]) -> List[Set[str]]:
    nbrs: Dict[str, Set[str]] = defaultdict(set)
    for name, person in pop.items():
        for m in _extract_names(person.get("other_family_members", [])):
            if m in pop:
                nbrs[name].add(m)
                nbrs[m].add(name)

    visited, families = set(), []
    for start in pop:
        if start in visited:
            continue
        comp, dq = set(), deque([start])
        while dq:
            node = dq.pop()
            if node in visited:
                continue
            visited.add(node)
            comp.add(node)
            dq.extend(nbrs[node])
        families.append(comp)
    return families

# --------------------------------------------------------------------- #
#  Statistics core                                                      #
# --------------------------------------------------------------------- #
def compute_statistics(pop: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
    gender, ages, roles      = Counter(), Counter(), Counter()
    incomes, facilities      = Counter(), Counter()
    licensed_drivers         = 0
    apartment_counts         = Counter()
    raw_role_set             = set()

    families = build_families(pop)

    fam_size_dist, vehicle_dist = Counter(), Counter()
    for fam in families:
        fam_size_dist[len(fam)] += 1
        max_veh = max(pop[n].get("number_of_vehicles_in_family", 0) for n in fam)
        vehicle_dist[max_veh if max_veh in (0, 1, 2) else ">2"] += 1

    for person in pop.values():
        gender[person.get("gender", "unknown")] += 1

        age = person.get("age", 0)
        ages["children" if age < 18 else "young" if age < 60 else "elderly"] += 1

        raw_role = person.get("family_role", "unknown")
        raw_role_set.add(raw_role)
        roles[normalise_role(raw_role)] += 1

        inc_key = "household_income" if "household_income" in person else "household income"
        incomes[person.get(inc_key, "unknown")] += 1

        if person.get("licensed_driver"):
            licensed_drivers += 1

        facilities[person.get("work_facility", "unknown")] += 1
        apartment_counts[person.get("home_facility", "unknown")] += 1

    return dict(
        total_people=len(pop),
        gender=gender,
        ages=ages,
        roles=roles,
        families_total=len(families),
        family_sizes=fam_size_dist,
        incomes=incomes,
        licensed_drivers=licensed_drivers,
        vehicle_dist=vehicle_dist,
        work_facilities=facilities,
        apartment_counts=apartment_counts,
        raw_roles=raw_role_set,
        families=families,                      # full list for printing
    )

# --------------------------------------------------------------------- #
#  Pretty report + family listing                                       #
# --------------------------------------------------------------------- #
def print_report(s: Dict[str, Any]) -> None:
    print("\n=== Population Summary ===")
    print(f"Total people: {s['total_people']}\n")

    print("— Gender —")
    for g, c in s['gender'].items():
        print(f"  {g}: {c}")
    print()

    print("— Age groups —")
    for grp, c in s['ages'].items():
        print(f"  {grp}: {c}")
    print()

    print("— Family roles —")
    for r, c in s['roles'].items():
        print(f"  {r}: {c}")
    print()

    print("— Family statistics —")
    print(f"  Total families: {s['families_total']}")
    for sz in sorted(s['family_sizes']):
        print(f"  families with {sz} member{'s' if sz > 1 else ''}: {s['family_sizes'][sz]}")
    print()

    print("— Household incomes —")
    for inc, c in s['incomes'].items():
        print(f"  {inc}: {c}")
    print()

    print(f"Licensed drivers: {s['licensed_drivers']}\n")

    print("— Vehicles per family —")
    for v, c in s['vehicle_dist'].items():
        lab = f"{v} vehicles" if isinstance(v, int) else str(v)
        print(f"  {lab}: {c}")
    print()

    print("— Workers per facility —")
    for fac, c in s['work_facilities'].items():
        print(f"  {fac}: {c}")
    print()

    print("— Residents per apartment —")
    up  = s['apartment_counts'].get("Uptown apartment", 0)
    mid = s['apartment_counts'].get("Midtown apartment", 0)
    print(f"  Uptown apartment residents : {up}")
    print(f"  Midtown apartment residents: {mid}")
    print("  (all apartments)")
    for apt, cnt in s['apartment_counts'].items():
        print(f"    {apt}: {cnt}")

    # ---------- NEW: list every family ----------
    print("\n— Families —")
    for fam in sorted(s['families'], key=lambda f: sorted(f)[0]):
        print(", ".join(sorted(fam)))

    # Optional: raw roles encountered
    # print("\nRaw roles:", sorted(s['raw_roles']))

# --------------------------------------------------------------------- #
#  Execute once                                                         #
# --------------------------------------------------------------------- #
pop = load_population()
stats = compute_statistics(pop)
print_report(stats)
save_adult_population(pop)


=== Population Summary ===
Total people: 70

— Gender —
  female: 36
  male: 34

— Age groups —
  young: 60
  children: 10

— Family roles —
  single: 26
  husband: 17
  wife: 17
  son: 5
  daughter: 5

— Family statistics —
  Total families: 43
  families with 1 member: 26
  families with 2 members: 8
  families with 3 members: 8
  families with 4 members: 1

— Household incomes —
  low: 10
  middle: 43
  high: 17

Licensed drivers: 56

— Vehicles per family —
  1 vehicles: 25
  0 vehicles: 4
  2 vehicles: 14

— Workers per facility —
  Coffee shop: 3
  Factory: 20
  Hospital: 3
  Gym: 3
  Office: 17
  Food court: 3
  Amusement park: 2
  Museum: 2
  Cinema: 2
  Supermarket: 3
  School: 12

— Residents per apartment —
  Uptown apartment residents : 36
  Midtown apartment residents: 34
  (all apartments)
    Uptown apartment: 36
    Midtown apartment: 34

— Families —
Ahmed Hassan, Fatima Hassan, Zara Hassan
Alex Kim, Jennifer Kim, Thomas Kim
Amanda Turner, Timothy Turner
Andrew Miller