In [None]:
"""Synthetic population descriptive statistics for CHAMP trip summaries for TIMMA

Parses output from CHAMP version: CHAMP 5.2.0-Toll_Quintile
"""
%load_ext autoreload

In [None]:
%autoreload
import polars as pl

from timma import load_population, crosstab_income_residency

In [None]:
dir = r"X:\Projects\TIMMA\Round7\Round7_2040_weekday_ubi_currentandlowincomeresidents"
population = load_population(dir).select(("hhid", "persid", "income_quintile", "residency"))

In [None]:
population = population.with_columns(
    # I don't want to deal with trying to order a Categorical right now
    pl.col("income_quintile").cast(pl.Utf8).cast(pl.Int8),
)

In [None]:
# check that the income_quintile and residency is consistent within a household
# groupby hhid, see how many income_quintile & residency is within each household
# (at this point, if we select the persid column, that would give the number of people in the household)
# then take unique to make sure there's always only 1 income_quintile & residency within each household
population.group_by("hhid").n_unique().select("income_quintile", "residency").unique()

In [None]:
persons = population.select(["persid", "income_quintile", "residency"])
households = population.select(["hhid", "income_quintile", "residency"]).group_by("hhid").first()

In [None]:
crosstab_income_residency(households)

In [None]:
crosstab_income_residency(persons)