In [None]:
import pandas as pd
from util_func import *
from util_func import (
    out_dir,
    survey_processed_dir,
    tour_extract_allwk_dir,
    tour_extract_wkday_dir,
)
from xlsxwriter.utility import xl_rowcol_to_cell

In [None]:
out_filepath = out_dir / "03_TNCTrips_Purpose.xlsx"

In [None]:
## Process person records
tmp_df = pd.read_csv(survey_processed_dir / "person.csv")
tmp_df = tmp_df[
    [
        "hh_id",
        "person_id",
        "person_num",
        "raceeth_new_imputed",
        "income_imputed",
        "gender",
        "age",
        "wt_alladult_mon",
        "wt_alladult_tue",
        "wt_alladult_wed",
        "wt_alladult_thu",
        "wt_alladult_fri",
        "wt_alladult_sat",
        "wt_alladult_sun",
    ]
]
tmp_df["person_id"] = tmp_df["person_id"].round().astype("int64")
tmp_df = tmp_df.rename(
    columns={"raceeth_new_imputed": "raceeth", "income_imputed": "hinc"}
)
tmp_df = tmp_df.rename(columns={"hh_id": "hhno", "person_num": "pno"})

per_df = pd.read_csv(tour_extract_wkday_dir / "person-assign_day.csv")
per_df = per_df.merge(tmp_df, how="left")

## Process trip records
raw_trips = raw_trips[
    ["hh_id", "person_num", "trip_num", "mode_uber", "mode_lyft", "mode_type_imputed"]
]
raw_trips = raw_trips.rename(
    columns={"hh_id": "hhno", "person_num": "pno", "trip_num": "tsvid"}
)

trip_df = pd.read_csv(tour_extract_wkday_dir / "trip-assign_day.csv")

req_percols = [
    "hhno",
    "pno",
    "raceeth",
    "hinc",
    "gender",
    "age",
    "wt_alladult_mon",
    "wt_alladult_tue",
    "wt_alladult_wed",
    "wt_alladult_thu",
    "wt_alladult_fri",
    "wt_alladult_sat",
    "wt_alladult_sun",
]


def prep_df(df):
    df = link_dt(df)

    df = df.loc[df["mode"] == 9,]
    df = df.merge(raw_trips, how="left")
    df = df[df["mode_type_imputed"] != 4]  # remove taxi trips

    df["tnc_type"] = 3  # prem/other
    df.loc[(df["mode_uber"] == 1) | (df["mode_lyft"] == 1), "tnc_type"] = 1  # pooled
    df.loc[(df["mode_uber"] == 2) | (df["mode_lyft"] == 2), "tnc_type"] = 2  # regular

    df = df.merge(per_df[req_percols], how="left", on=["hhno", "pno"])
    df.loc[df["gender"] == 997, "gender"] = 5  # Other
    df.loc[df["gender"] == 999, "gender"] = 6  # NoAnswer
    df.loc[df["gender"].isin([-9998, 995]), "gender"] = 6  # Missing

    df["dephr"] = (df["deptm"] / 100).astype(int)
    df["count"] = 1
    df = df[(df["trexpfac"] > 0) & (df["mode"] > 0)]
    df = df[(df["otaz"] > 0) & (df["dtaz"] > 0)]
    return df


trip_df = prep_df(trip_df)

In [None]:
col_dict = {
    "dpurp": {
        "desc": "DPurp",
        "col": "dpurp",
        "vals": range(0, 8),
        "labels": [
            "1_Home",
            "2_Work",
            "3_School",
            "4_Escort",
            "5_PersBus",
            "6_Shop",
            "7_Meal",
            "8_SocRec",
        ],
    },
    "raceeth": {
        "desc": "RaceEth",
        "col": "raceeth",
        "vals": range(1, 6),
        "labels": ["1_Hispanic", "2_Black", "3_Asian/PI", "4_White", "5_Other"],
    },
    "hinc": {
        "desc": "HHInc",
        "col": "hinc",
        "vals": range(1, 9),
        "labels": [
            "1_25K",
            "2_25_50K",
            "3_50_75K",
            "4_75_100K",
            "5_100_150K",
            "6_150_200K",
            "7_200_250K",
            "8_250K",
        ],
    },
    "age": {
        "desc": "Age",
        "col": "age",
        "vals": range(4, 11),
        "labels": ["18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75+"],
    },
    "gender": {
        "desc": "Gend",
        "col": "gender",
        "vals": range(1, 7),
        "labels": ["1_F", "2_M", "3_Trns", "4_NBin", "5_Oth", "6_Miss"],
    },
    "tncmode": {
        "desc": "TNCMode",
        "col": "tnc_type",
        "vals": range(1, 4),
        "labels": ["2_Pool", "1_Reg", "3_PremOth"],
    },
    "tod": {
        "desc": "TOD",
        "col": "dephr",
        "vals": range(0, 24),
        "labels": [
            "10_0AM",
            "11_1AM",
            "12_2AM",
            "13_3AM",
            "14_4AM",
            "15_5AM",
            "16_6AM",
            "17_7AM",
            "18_8AM",
            "19_9AM",
            "20_10AM",
            "21_11AM",
            "22_12AM",
            "23_1PM",
            "24_2PM",
            "25_3PM",
            "26_4PM",
            "27_5PM",
            "28_6PM",
            "29_7PM",
            "30_8PM",
            "31_9PM",
            "32_10PM",
            "33_11PM",
        ],
    },
    "day": {
        "desc": "DOW",
        "col": "day",
        "vals": range(1, 8),
        "labels": ["1_Mon", "2_Tue", "3_Wed", "4_Thu", "5_Fri", "6_Sat", "7_Sun"],
    },
}

In [None]:
writer = pd.ExcelWriter(out_filepath, engine="xlsxwriter")
workbook = writer.book
format1 = workbook.add_format({"num_format": "#,##0.0"})

In [None]:
row = 0
sname = "Weekday"
d1_dict = col_dict["dpurp"]

title = "TNC Trips by " + d1_dict["desc"]
tab = prep_data_1d(
    trip_df,
    d1_dict["desc"],
    d1_dict["col"],
    "trexpfac",
    d1_dict["vals"],
    d1_dict["labels"],
)
row = write_to_excel(tab, sname, title, row)

title = "Column Shares by " + d1_dict["desc"]
row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

tab2 = tab.copy()
tab2.iloc[-1, -1] = tab2.iloc[-1, 0]

title = "Column Shares 95% CI by " + d1_dict["desc"]
row = write_to_excel(getSharesIdxCI95(tab.copy()), sname, title, row)

tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [None]:
wt_cols = ["count", "trexpfac"]
wt_desc = ["(Unweighted)", "(Weighted)"]

for key in ["tod", "raceeth", "hinc", "age", "gender", "tncmode"]:
    d2_dict = col_dict[key]
    row = 0
    sname = d2_dict["desc"]

    for wc, wd in zip(wt_cols, wt_desc):
        title = "TNC Trips by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
        tab, tab_fmt = prep_data_2d(
            trip_df,
            d1_dict["col"],
            d1_dict["vals"],
            d1_dict["labels"],
            d2_dict["col"],
            d2_dict["vals"],
            d2_dict["labels"],
            wc,
        )
        row = write_to_excel(tab.astype("float64"), sname, title, row)

        if wc == "count":
            tab2 = tab.copy()
        else:
            tab2.iloc[:-1, :-1] = tab.iloc[:-1, :-1]

        title = (
            "Column Shares by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
        )
        row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

        title = (
            "Column Shares 95% CI by "
            + d2_dict["desc"]
            + " and "
            + d1_dict["desc"]
            + " "
            + wd
        )
        row = write_to_excel(getSharesIdxCI95(tab2.copy()), sname, title, row)
    tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
    _ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [None]:
## Process all week trip records
trip_df = pd.read_csv(tour_extract_allwk_dir / "trip-assign_day.csv")
trip_df = prep_df(trip_df)

DOW_LOOKUP = {1: "mon", 2: "tue", 3: "wed", 4: "thu", 5: "fri", 6: "sat", 7: "sun"}
trip_df["trexpfac"] = 0
for dow_num, dow in DOW_LOOKUP.items():
    trip_df.loc[trip_df["day"] == dow_num, "trexpfac"] = trip_df.loc[
        trip_df["day"] == dow_num, "wt_alladult_" + dow
    ]
trip_df["trexpfac"] = trip_df["trexpfac"].fillna(0)

In [None]:
row = 0
sname = "DOW"
d2_dict = col_dict["day"]

In [None]:
for wc, wd in zip(wt_cols, wt_desc):
    title = "TNC Trips by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    tab, tab_fmt = prep_data_2d(
        trip_df,
        d1_dict["col"],
        d1_dict["vals"],
        d1_dict["labels"],
        d2_dict["col"],
        d2_dict["vals"],
        d2_dict["labels"],
        wc,
    )
    row = write_to_excel(tab.astype("float64"), sname, title, row)

    if wc == "count":
        tab2 = tab.copy()
    else:
        tab2.iloc[:-1, :-1] = tab.iloc[:-1, :-1]

    title = "Column Shares by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

    title = (
        "Column Shares 95% CI by "
        + d2_dict["desc"]
        + " and "
        + d1_dict["desc"]
        + " "
        + wd
    )
    row = write_to_excel(getSharesIdxCI95(tab2.copy()), sname, title, row)
tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [None]:
writer.close()