In [None]:
import pandas as pd
import polars as pl
from util_func import *
from util_func import (
    out_dir,
    reformat_dir,
    survey_processed_dir,
    tour_extract_wkday_dir,
)
from xlsxwriter.utility import xl_rowcol_to_cell

In [None]:
trip_tour_extract_wkday_filepath = tour_extract_wkday_dir / "trip-assign_day.csv"
out_filepath = out_dir / "06_TransitTrips_AccessEgress.xlsx"

In [None]:
# Process trip records
trip_raw = (
    pl.read_csv(
        survey_processed_dir / "trip.csv",
        columns=[
            "hh_id",
            "person_num",
            "trip_num",
            "transit_access",
            "transit_egress",
        ],
    )
    .rename({"hh_id": "hhno", "person_num": "pno", "trip_num": "tsvid"})
    .to_pandas()
)
accegr_df = pd.read_csv(reformat_dir / "accegr_week.csv")
accegr_df = accegr_df[["hhno", "pno", "tripno", "acc_mode", "egr_mode"]]
accegr_df = accegr_df.rename(columns={"tripno": "tsvid"})
mode_type_dict = {
    1: "Walk",
    2: "Bike",
    3: "Car",
    4: "Taxi",
    5: "Transit",
    6: "SchBus",
    7: "Other",
    8: "ShutVan",
    9: "TNC",
    10: "Carshr",
    11: "Bikeshr",
    12: "Scooshr",
    13: "Lngdist",
}


def map_access(df, col):
    # col must be "access" or "egress"
    df["rpl_flag"] = 0
    # this function only acts on rows where acc/egr_mode == 5 or isnan
    df.loc[
        (df[col[:3] + "_mode"] == 5) | (pd.isna(df[col[:3] + "_mode"])), "rpl_flag"
    ] = 1
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 1),
        col,
    ] = "Walk"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 2),
        col,
    ] = "Bike"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 3),
        col,
    ] = "Bikeshr"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 4),
        col,
    ] = "Scooshr"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 5),
        col,
    ] = "TNC"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 6),
        col,
    ] = "Car"
    df.loc[
        (df["rpl_flag"] == 1) & (df["transit_" + col] == 7),
        col,
    ] = "Carshr"
    df.loc[df[col] == "Transit", col] = "Walk"
    df.loc[pd.isna(df[col]), col] = "Missing"
    return df


def prep_df(trip_tour_extract_wkday_filepath, trip_raw, accegr_df):
    trip = pd.read_csv(trip_tour_extract_wkday_filepath)
    trip = link_dt(trip)
    trip = trip.loc[trip["mode"].isin([6, 7])]
    trip = trip[trip["trexpfac"] > 0]
    trip = trip[(trip["otaz"] > 0) & (trip["dtaz"] > 0)]
    trip = trip.merge(trip_raw, how="left")
    trip = trip.merge(accegr_df, how="left")

    # acc/egr_mode -> access/egress (via mode_type_dict)
    # (except when acc_mode == 5 or isnan, see below)
    trip["access"] = trip["acc_mode"].map(mode_type_dict)
    trip["egress"] = trip["egr_mode"].map(mode_type_dict)
    # when acc_mode == 5 or isnan: transit_access -> access (via map_access)
    # (same for egress)
    trip = map_access(trip, "access")
    trip = map_access(trip, "egress")
    # TODO simplify the above 2 steps into one step with polars when/then

    trip["count"] = 1
    return trip


trip = prep_df(trip_tour_extract_wkday_filepath, trip_raw, accegr_df)

In [None]:
# print out the results directly with dataframe operations
# (vs the whole unwieldy Excel print-out process below)
def calculate_margin_sums(df, index_col):
    df = df.with_columns(sum=pl.sum_horizontal(pl.exclude(index_col)))
    return pl.concat([df, df.sum().fill_null("sum")])


access_vs_egress = pl.from_pandas(trip).pivot(
    index="access", columns="egress", values="access", aggregate_function="len"
)
# TODO sort the access and egress entries so they're ordered the same
with pl.Config(tbl_cols=-1, tbl_rows=-1, tbl_width_chars=120):  # print all columns/rows
    print(calculate_margin_sums(access_vs_egress, "access"))


In [None]:
access_egress_output_vals = [
    "Walk",
    "Bike",
    "Car",
    "Taxi",
    "Bikeshr",
    "Scooshr",
    "ShutVan",
    "SchBus",
    "Lngdist",
    "Other",
    "Missing",
]
access_egress_output_labels = [  # output is sorted by labels...
    f"{i:02}-{val}" for i, val in enumerate(access_egress_output_vals)
]
# make sure to cross check this list with
# access_vs_egress.select("access", "egress").unique()

In [None]:
col_dict = {
    "access": {
        "desc": "Acess",
        "col": "access",
        "vals": access_egress_output_vals,
        "labels": access_egress_output_labels,
    },
    "egress": {
        "desc": "Egress",
        "col": "egress",
        "vals": access_egress_output_vals,
        "labels": access_egress_output_labels,
    },
}

In [None]:
writer = pd.ExcelWriter(out_filepath, engine="xlsxwriter")
workbook = writer.book
format1 = workbook.add_format({"num_format": "#,##0.0"})

In [None]:
wt_cols = ["count", "trexpfac"]
wt_desc = ["(Unweighted)", "(Weighted)"]

d1_dict = col_dict["access"]
d2_dict = col_dict["egress"]

row = 0
sname = "Weekday"

for wc, wd in zip(wt_cols, wt_desc):
    title = "Transit Trips by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    tab = prep_data_2d(
        trip,
        d1_dict["col"],
        d1_dict["vals"],
        d1_dict["labels"],
        d2_dict["col"],
        d2_dict["vals"],
        d2_dict["labels"],
        wc,
    )
    row = write_to_excel(writer, tab.astype("float64"), sname, title, row)

    if wc == "count":
        tab2 = tab.copy()
    else:
        tab2.iloc[:-1, :-1] = tab.iloc[:-1, :-1]

    title = "Column Shares by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    row = write_to_excel(writer, getSharesIdx(tab.copy()), sname, title, row)

    title = (
        "Column Shares 95% CI by "
        + d2_dict["desc"]
        + " and "
        + d1_dict["desc"]
        + " "
        + wd
    )
    row = write_to_excel(writer, getSharesIdxCI95(tab2.copy()), sname, title, row)
tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [None]:
writer.close()