In [1]:
from pathlib import Path

import polars as pl

In [2]:
raw_dir = Path(r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2022\Deliverable_20240809")

In [3]:
hh = pl.read_csv(
    raw_dir / "hh.csv", columns=["hh_id", "hh_weight", "hh_weight_rmove_only"]
)
person = pl.read_csv(
    raw_dir / "person.csv",
    columns=[
        "hh_id",
        "person_num",
        "person_weight",
        "person_weight_rmove_only",
        "is_complete",
    ],
).join(hh, on="hh_id")

In [4]:
# all households have a non-zero weight
len(hh.filter(pl.col("hh_weight") == 0))


0

In [5]:
# for persons: (is_complete == 995) iff (person_weight == 0)

In [6]:
person.filter(pl.col("person_weight") == 0).select(pl.col("is_complete").unique())

is_complete
i64
995


In [7]:
person.filter(pl.col("is_complete") == 995).select(pl.col("person_weight").unique())

person_weight
f64
0.0


In [8]:
print(
    "person_weight and hh_weight all equal?",
    person.select((pl.col("person_weight") == pl.col("hh_weight")).all()).item(),
)
print("total number of persons:", len(person))
print(
    "number of persons with different person and hh weights:",
    len(person.filter((pl.col("person_weight") != pl.col("hh_weight")))),
)
print(
    "number of persons with different rmove-only person and hh weights:",
    len(
        person.filter(
            (pl.col("person_weight_rmove_only") != pl.col("hh_weight_rmove_only"))
        )
    ),
)

person_weight and hh_weight all equal? False
total number of persons: 15985
number of persons with different person and hh weights: 775
number of persons with different rmove-only person and hh weights: 710


In [9]:
print(
    "of persons with different person and hh weights, "
    "all of them have a person weight of 0:"
)
person.filter((pl.col("person_weight") != pl.col("hh_weight"))).select(
    (pl.col("person_weight") == 0).all()
)

of persons with different person and hh weights, all of them have a person weight of 0:


person_weight
bool
True


In [10]:
print(
    "of persons with different rmove-only person and hh weights, "
    "all of them have a rmove-only person weight of 0:"
)
person.filter(
    pl.col("person_weight_rmove_only") != pl.col("hh_weight_rmove_only")
).select((pl.col("person_weight_rmove_only") == 0).all())

of persons with different rmove-only person and hh weights, all of them have a rmove-only person weight of 0:


person_weight_rmove_only
bool
True
