In [1]:
import pandas as pd
from util_func import *

In [2]:
# Bay Area
REG = "BayArea"
base_dir = r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\2_tour_extract\wt_wkday"
raw_dir = (
    r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\spatial_join"
)
link_dir = r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\1_reformat_survey"
out_file = r"out\%s_6_TransitTrips_AccEgr.xlsx" % REG

In [5]:
## Process trip records
raw_trips = pd.read_csv(join(raw_dir, "ex_trip_wZones.csv"))
raw_trips = raw_trips[
    [
        "hh_id",
        "person_num",
        "trip_num",
        "bus_access",
        "bus_egress",
        "rail_access",
        "rail_egress",
        "mode_type_imputed",
    ]
]
raw_trips = raw_trips.rename(
    columns={"hh_id": "hhno", "person_num": "pno", "trip_num": "tsvid"}
)

accegr_df = pd.read_csv(join(link_dir, "accegr_week.csv"))
accegr_df = accegr_df[["hhno", "pno", "tripno", "acc_mode", "egr_mode"]]
accegr_df = accegr_df.rename(columns={"tripno": "tsvid"})

mode_type_dict = {
    1: "Walk",
    2: "Bike",
    3: "Car",
    4: "Taxi",
    5: "Transit",
    6: "SchBus",
    7: "Other",
    8: "ShutVan",
    9: "TNC",
    10: "Carshr",
    11: "Bikeshr",
    12: "Scooshr",
    13: "Lngdist",
}
acc_type_dict = {
    "Walk": 1,
    "Bike": 2,
    "Car": 3,
    "Taxi": 4,
    "TNC": 5,
    "Carshr": 6,
    "Bikeshr": 7,
    "Scooshr": 8,
    "ShutVan": 9,
    "Lngdist": 10,
    "Other": 11,
    "Missing": 12,
}

SURV_ACC_MODES = list(range(1, 8))


def map_access(df, col, desc):
    df["rpl_flag"] = 0
    df.loc[(df[col + "_mode"] == 5) | (pd.isna(df[col])), "rpl_flag"] = 1

    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 1),
        col,
    ] = "Walk"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 2),
        col,
    ] = "Bike"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 3),
        col,
    ] = "Bikeshr"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 4),
        col,
    ] = "Scooshr"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 5),
        col,
    ] = "TNC"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 6),
        col,
    ] = "Car"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["rail_" + desc].isin(SURV_ACC_MODES))
        & (df["bus_" + desc] == 7),
        col,
    ] = "Carshr"

    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 1),
        col,
    ] = "Walk"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 2),
        col,
    ] = "Bike"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 3),
        col,
    ] = "Bikeshr"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 4),
        col,
    ] = "Scooshr"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 5),
        col,
    ] = "TNC"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 6),
        col,
    ] = "Car"
    df.loc[
        (df["rpl_flag"] == 1)
        & (~df["bus_" + desc].isin(SURV_ACC_MODES))
        & (df["rail_" + desc] == 7),
        col,
    ] = "Carshr"

    df.loc[df[col] == "Transit", col] = "Walk"
    df.loc[pd.isna(df[col]), col] = "Missing"
    return df


def prep_df(trip_dir):
    df = pd.read_csv(join(trip_dir, "survey2018_tripx.dat"), sep=" ")
    df = link_dt(df)
    df = df.loc[df["mode"].isin([6, 7]),]
    df = df[df["trexpfac"] > 0]
    df = df[(df["otaz"] > 0) & (df["dtaz"] > 0)]

    df = df.merge(raw_trips, how="left")
    df = df.merge(accegr_df, how="left")

    df["acc"] = df["acc_mode"].map(mode_type_dict)
    df = map_access(df, "acc", "access")

    df["egr"] = df["egr_mode"].map(mode_type_dict)
    df = map_access(df, "egr", "egress")

    df["acc_egr"] = df["acc"] + "_" + df["egr"]

    df["acc_type"] = df["acc"].map(acc_type_dict)
    df["egr_type"] = df["egr"].map(acc_type_dict)

    df["count"] = 1
    return df


trip_df = prep_df(base_dir)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [6]:
col_dict = {
    "acc_type": {
        "desc": "Acess",
        "col": "acc_type",
        "vals": list(range(1, 11)) + [12],
        "labels": [
            "10_Walk",
            "11_Bike",
            "12_Car",
            "13_Taxi",
            "14_TNC",
            "15_Carshr",
            "16_Bikeshr",
            "17_Scooshr",
            "18_ShuVan",
            "19_Lngdist",
            "20_Missing",
        ],
    },
    "egr_type": {
        "desc": "Egress",
        "col": "egr_type",
        "vals": list(range(1, 11)) + [12],
        "labels": [
            "10_Walk",
            "11_Bike",
            "12_Car",
            "13_Taxi",
            "14_TNC",
            "15_Carshr",
            "16_Bikeshr",
            "17_Scooshr",
            "18_ShuVan",
            "19_Lngdist",
            "20_Missing",
        ],
    },
}

In [7]:
fname = out_file
writer = pd.ExcelWriter(fname, engine="xlsxwriter")
workbook = writer.book
format1 = workbook.add_format({"num_format": "#,##0.0"})

In [8]:
from xlsxwriter.utility import xl_rowcol_to_cell

wt_cols = ["count", "trexpfac"]
wt_desc = ["(Unweighted)", "(Weighted)"]

d1_dict = col_dict["acc_type"]
d2_dict = col_dict["egr_type"]

row = 0
sname = "Weekday"

for wc, wd in zip(wt_cols, wt_desc):
    title = "Transit Trips by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    tab, tab_fmt = prep_data_2d(
        trip_df,
        d1_dict["col"],
        d1_dict["vals"],
        d1_dict["labels"],
        d2_dict["col"],
        d2_dict["vals"],
        d2_dict["labels"],
        wc,
    )
    row = write_to_excel(tab.astype("float64"), sname, title, row)

    if wc == "count":
        tab2 = tab.copy()
    else:
        tab2.iloc[:-1, :-1] = tab.iloc[:-1, :-1]

    title = "Column Shares by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

    title = (
        "Column Shares 95% CI by "
        + d2_dict["desc"]
        + " and "
        + d1_dict["desc"]
        + " "
        + wd
    )
    row = write_to_excel(getSharesIdxCI95(tab2.copy()), sname, title, row)
tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [9]:
writer.save()