The at-work subtour frequency model selects the number of at-work subtours made for each work tour.

- work tours Alternatives: none, 1 eating out tour, 1 business tour, 1 maintenance tour, 2 business tours, 1 eating out tour + 1 business tour
- Explanatory variables include employment status, income, auto ownership, the frequency of other tours, characteristics of the parent work tour, and characteristics of the workplace zone.


In [1]:
import os
import toml
import pandas as pd
import numpy as np
import plotly.express as px
import validation_data_input
import psrc_theme

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

# run get_data.ipynb to load validation data
%store -r validation_data

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
tour_data = validation_data.tours_cleaned.copy()
per_data = validation_data.persons_data_uncloned.copy()
hh_data = validation_data.hh_data_uncloned.copy()

In [3]:
# select all work tours that could make atwork subtours
work_tour = tour_data.loc[tour_data['tour_type']=="work"].copy()
work_tour['atwork_subtour_frequency_count'] = 0
work_tour.loc[work_tour['atwork_subtour_frequency'].isin(['eat','maint','business1']),'atwork_subtour_frequency_count'] = 1
work_tour.loc[work_tour['atwork_subtour_frequency'].isin(['eat_business','business2']),'atwork_subtour_frequency_count'] = 2

In [4]:
df_plot = work_tour.groupby(['source','atwork_subtour_frequency_count'])['tour_weight'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))
fig1 = px.bar(df_plot, x='atwork_subtour_frequency_count', y="percentage", color="source", barmode="group",
              title="atwork subtours frequency")
fig1.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig1.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".1%"))
fig1.show()

In [5]:
df_plot = work_tour.groupby(['source','atwork_subtour_frequency'])['tour_weight'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))
fig1 = px.bar(df_plot.loc[df_plot['atwork_subtour_frequency']!='no_subtours'], x='atwork_subtour_frequency', y="percentage", color="source", barmode="group",
              category_orders={"atwork_subtour_frequency": ['no_subtours','eat','maint','business1','eat_business','business2']},
              title="atwork subtours frequency by purpose (excluding no_subtours)")
fig1.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig1.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".1%"))
fig1.show()

In [6]:
# add auto_ownership with 4+
hh_data['auto_ownership_simple'] = hh_data['auto_ownership'].apply(lambda x: "4+" if x>=4.0 else str(x))

# add income group
var_group = hh_data.loc[hh_data['source']=="model results", ['income']].quantile([.125, .25, .50, .75])
hh_data['hhincome_group'] = pd.cut(hh_data['income'],bins=[-9999999.0] + var_group['income'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])

# add person type labels
ptype_cat = {1: "1: Full-Time Worker",
             2: "2: Part-Time Worker",
             3: "3: University Student",
             4: "4: Non-Working Adult Age <65",
             5: "5: Non-Working Adult Age 65+",
             6: "6: High School Student Age 16+",
             7: "7: Child Age 5-15",
             8: "8: Child Age 0-4"}
per_data['ptype_label'] = per_data['ptype'].map(ptype_cat)

per_data = per_data.merge(hh_data[['household_id','income','hhincome_group','auto_ownership_simple','source']],
                          how='left', on=['household_id','source']) # get auto ownership from hh data
work_tour = work_tour.merge(per_data, how='left', on=['person_id','household_id','source'])

## atwork subtours frequency by segment

In [7]:
def plot_atwork_subtour(df:pd.DataFrame, var:str, title_cat:str,sub_name:str):
    df_plot = df.groupby(['source',var,'atwork_subtour_frequency'])['tour_weight'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source',var], group_keys=False)['tour_weight']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot.loc[df_plot['atwork_subtour_frequency']!='no_subtours'],
                 x="atwork_subtour_frequency", y="percentage", color="source",
                 facet_col=var, barmode="group",
                 category_orders={"atwork_subtour_frequency": ['eat','maint','business1','eat_business','business2']},
                 title="atwork subtour frequency by "+ title_cat)
    fig.for_each_annotation(lambda a: a.update(text = sub_name + "=<br>" + a.text.split("=")[-1]))
    fig.update_xaxes(title_text="")
    fig.update_layout(height=400, width=850, yaxis=dict(tickformat=".1%"))
    fig.show()

In [8]:
plot_atwork_subtour(work_tour,'auto_ownership_simple',"auto ownership","vehicle")

In [9]:
plot_atwork_subtour(work_tour,'hhincome_group',"household income","income")

In [10]:
plot_atwork_subtour(work_tour,'ptype_label',"person type","ptype")