In [1]:
from os.path import join

import pandas as pd
from util_func import *

In [2]:
# Bay Area
REG = "BayArea"
base_dir = r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\2_tour_extract\wt_wkday"
allwk_dir = r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\2_tour_extract\wt_7day"
raw_dir = (
    r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20200228\spatial_join"
)
out_file = r"out\%s_1_AllTrips_Mode.xlsx" % REG

In [5]:
## Process trip records
trip_df = pd.read_csv(join(base_dir, "survey2018_tripx.dat"), sep=" ")
trip_df = link_dt(trip_df)

trip_df["count"] = 1

trip_df = trip_df[(trip_df["trexpfac"] > 0) & (trip_df["mode"] > 0)]
trip_df = trip_df[(trip_df["otaz"] > 0) & (trip_df["dtaz"] > 0)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [6]:
col_dict = {
    "mode": {
        "desc": "Mode",
        "col": "mode",
        "vals": range(1, 10),
        "labels": [
            "7_Wk",
            "8_Bk",
            "1_DA",
            "2_SOV2",
            "3_SOV3",
            "5_WTrn",
            "6_DTrn",
            "9_SBus",
            "4_TNC",
        ],
    },
    "day": {
        "desc": "DOW",
        "col": "day",
        "vals": range(6, 8),
        "labels": ["6_Sat", "7_Sun"],
    },
}

In [7]:
fname = out_file
writer = pd.ExcelWriter(fname, engine="xlsxwriter")
workbook = writer.book
format1 = workbook.add_format({"num_format": "#,##0.0"})

In [8]:
from xlsxwriter.utility import xl_rowcol_to_cell

row = 0
sname = "Weekday"
d1_dict = col_dict["mode"]

title = "All Trips by " + d1_dict["desc"]
tab = prep_data_1d(
    trip_df,
    d1_dict["desc"],
    d1_dict["col"],
    "trexpfac",
    d1_dict["vals"],
    d1_dict["labels"],
)
row = write_to_excel(tab, sname, title, row)

title = "Column Shares by " + d1_dict["desc"]
row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

tab2 = tab.copy()
tab2.iloc[-1, -1] = tab2.iloc[-1, 0]

title = "Column Shares 95% CI by " + d1_dict["desc"]
row = write_to_excel(getSharesIdxCI95(tab.copy()), sname, title, row)

tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [9]:
## Process person records
tmp_df = pd.read_csv(join(raw_dir, "ex_person_wZones.csv"))
if REG == "SANDAG" or REG == "SCAG":
    tmp_df2 = pd.read_csv(join(raw_dir, "ex2_person.tsv"), sep="\t")
    tmp_df = tmp_df.merge(
        tmp_df2[["hh_id", "person_id", "raceeth_new_imputed"]], how="left"
    )
tmp_df = tmp_df[
    [
        "hh_id",
        "person_id",
        "person_num",
        "raceeth_new_imputed",
        "income_imputed",
        "gender",
        "age",
        "wt_alladult_mon",
        "wt_alladult_tue",
        "wt_alladult_wed",
        "wt_alladult_thu",
        "wt_alladult_fri",
        "wt_alladult_sat",
        "wt_alladult_sun",
    ]
]
tmp_df["person_id"] = tmp_df["person_id"].round().astype("int64")
tmp_df = tmp_df.rename(
    columns={"raceeth_new_imputed": "raceeth", "income_imputed": "hinc"}
)
tmp_df = tmp_df.rename(columns={"hh_id": "hhno", "person_num": "pno"})

per_df = pd.read_csv(join(base_dir, "survey2018_precx.dat"), sep=" ")
per_df = per_df.merge(tmp_df, how="left")

## Process all week trip records
trip_df = pd.read_csv(join(allwk_dir, "survey2018_tripx.dat"), sep=" ")
trip_df = link_dt(trip_df)

trip_df = trip_df.merge(
    per_df[
        [
            "hhno",
            "pno",
            "raceeth",
            "hinc",
            "gender",
            "age",
            "wt_alladult_mon",
            "wt_alladult_tue",
            "wt_alladult_wed",
            "wt_alladult_thu",
            "wt_alladult_fri",
            "wt_alladult_sat",
            "wt_alladult_sun",
        ]
    ],
    how="left",
    on=["hhno", "pno"],
)
DOW_LOOKUP = {1: "mon", 2: "tue", 3: "wed", 4: "thu", 5: "fri", 6: "sat", 7: "sun"}
trip_df["trexpfac"] = 0
for dow_num, dow in DOW_LOOKUP.items():
    trip_df.loc[trip_df["day"] == dow_num, "trexpfac"] = trip_df.loc[
        trip_df["day"] == dow_num, "wt_alladult_" + dow
    ]
trip_df["trexpfac"] = trip_df["trexpfac"].fillna(0)

trip_df["count"] = 1
trip_df = trip_df[(trip_df["trexpfac"] > 0) & (trip_df["mode"] > 0)]
trip_df = trip_df[(trip_df["otaz"] > 0) & (trip_df["dtaz"] > 0)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [10]:
row = 0
sname = "Weekend"
d1_dict = col_dict["mode"]
d2_dict = col_dict["day"]

wt_cols = ["count", "trexpfac"]
wt_desc = ["(Unweighted)", "(Weighted)"]

for wc, wd in zip(wt_cols, wt_desc):
    title = "All Trips by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    tab, tab_fmt = prep_data_2d(
        trip_df,
        d1_dict["col"],
        d1_dict["vals"],
        d1_dict["labels"],
        d2_dict["col"],
        d2_dict["vals"],
        d2_dict["labels"],
        wc,
    )
    row = write_to_excel(tab.astype("float64"), sname, title, row)

    if wc == "count":
        tab2 = tab.copy()
    else:
        tab2.iloc[:-1, :-1] = tab.iloc[:-1, :-1]

    title = "Column Shares by " + d2_dict["desc"] + " and " + d1_dict["desc"] + " " + wd
    row = write_to_excel(getSharesIdx(tab.copy()), sname, title, row)

    title = (
        "Column Shares 95% CI by "
        + d2_dict["desc"]
        + " and "
        + d1_dict["desc"]
        + " "
        + wd
    )
    row = write_to_excel(getSharesIdxCI95(tab2.copy()), sname, title, row)
tab_range = xl_rowcol_to_cell(row, 1) + ":" + xl_rowcol_to_cell(row, tab.shape[1])
_ = writer.sheets[sname].set_column(tab_range, 11, format1)

In [11]:
writer.save()