In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import polars as pl
import seaborn as sns

In [None]:
dir = r"X:\Projects\TIMMA\Round7\Round7_2040_weekday_ubi_currentandlowincomeresidents"
trips_joined_filename = "TRIPMC1-simplified.parquet"
trips_joined_filepath = Path(dir, "summaries", trips_joined_filename)

In [None]:
trips = pl.read_parquet(trips_joined_filepath).with_columns(
    # I don't want to deal with trying to order a Categorical right now
    pl.col("income_quintile").cast(pl.Utf8).cast(pl.Int8),
    # casting to str from Categorical to allow is_in()
    pl.col("trip_mode").cast(pl.Utf8)
)

In [None]:
def filter_group_count(trips, filter, by, title=True):
    summary_df = trips.filter(filter).group_by(by).count()
    plot = sns.barplot(
        data=summary_df.to_pandas(),
        x=by,
        y="count",
    )
    if title:
        plot = plot.set_title(f"filter: {filter}\ngroupby: {by}")
    return (summary_df, plot)

In [None]:
filter = (pl.col("origin") == "TI")
summary_df, plot = filter_group_count(trips, filter, "income_quintile")
display(summary_df)
plot
plt.show()

In [None]:
filter = ((pl.col("origin") != "TI") & (pl.col("destination") == "TI"))
summary_df, plot = filter_group_count(trips, filter, "income_quintile")
display(summary_df)
plot
plt.show()

In [None]:
ti_trips_inc_intra_ti_filter = (pl.col("origin") == "TI") | (
    pl.col("destination") == "TI"
)
ti_trips_exc_intra_ti_filter = (pl.col("origin") == "TI") ^ (
    pl.col("destination") == "TI"
)  # XOR

In [None]:
summary_df, plot = filter_group_count(trips, ti_trips_inc_intra_ti_filter, "purpose")
display(summary_df)
plot
plt.show()

In [None]:
summary_df, plot = filter_group_count(
    trips,
    ti_trips_inc_intra_ti_filter,
    "trip_mode"
)
display(summary_df)
plot
plt.show()

In [None]:
summary_df, plot = filter_group_count(
    trips,
    ti_trips_exc_intra_ti_filter,
    "trip_mode"
)
display(summary_df)
plot
plt.show()

In [None]:
filter = ti_trips_exc_intra_ti_filter & (
    pl.col("trip_mode").is_in(["auto-DA", "auto-SR2", "auto-SR3"])
)
summary_df, plot = filter_group_count(trips, filter, "purpose")
display(summary_df)
plot
plt.show()

In [None]:
filter = ti_trips_exc_intra_ti_filter & (pl.col("trip_mode") == "transit")
summary_df, plot = filter_group_count(trips, filter, "residency")
display(summary_df)
plot
plt.show()

In [None]:
sns.heatmap(
    trips.filter(ti_trips_exc_intra_ti_filter)
    .group_by(["trip_mode", "income_quintile"])
    .count()
    .to_pandas()
    .pivot(values="count", index="trip_mode", columns="income_quintile")
)
plt.show()