In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))
input_config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'input_configuration.toml'))
# model_dir = os.path.join(os.getcwd(), '..\\..\\..\\..\\')

In [2]:
#| include: false

hh = validation_data_input.get_data('household', source="survey")
person = validation_data_input.get_data('person', source="survey")
tour = validation_data_input.get_data('tour', source="survey")
trip = validation_data_input.get_data('trip', source="survey")

person_day = validation_data_input.get_data('person_day', source="survey")

read household survey


read person survey


read tour survey


read trip survey


read person_day survey


- analysis is using daysim format survey data

- workers are divided into 5 groups:
    1. work from home: home parcel = work parcel
    2. telecommuter: work at home 3 hours or more and make no work tours
    3. didn't work workers: work at home less than 3 hours and make no work tours
    4. hybrid workers: work at home 3 hours or more and make at least one work tour
    5. commuters: work at home less than 3 hours and make at least one work tour

In [3]:
# person_day and tour has more people/households than person and hh data (inner join)
survey_person_day = person_day.merge(person.merge(hh, on=['hhno'], how='left'), on=['hhno','pno'], how='inner')

# Define worker type
survey_person_day['worker_type_5'] = 'not worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']==survey_person_day['hhparcel']),'worker_type_5'] = 'wfh'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'telecommuter'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'didn\'t work worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'hybrid worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'commuter'

# number of escort stops on person day
df_escort_stops = trip[trip['dpurp']==3].groupby(['hhno', 'pno', 'day'])['tsvid'].agg(escort_stops = "size").reset_index()
survey_person_day = survey_person_day.merge(df_escort_stops, how='left', on=['hhno', 'pno', 'day']).fillna(0).astype({'escort_stops': 'int32'})

# number of non-mandatory tours on person day
survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']

# precense of children in houseohld
survey_person_day['hhcu15'] = survey_person_day['hh515'] + survey_person_day['hhcu5']
survey_person_day['children'] = survey_person_day['hhcu15'].apply(lambda x: 'have kid(s)' if x>0 else 'no kid')


survey_tour = tour.\
    merge(survey_person_day[['hhno', 'pno', 'day','wktours','wkathome', 'pwtyp', 'pwpcl','hhparcel', 'worker_type','worker_type_5']], 
          how='inner', on=['hhno', 'pno', 'day'])

purpose_dict = {1: 'work',
                2: 'school',
                3: 'escort',
                4: 'personal business',
                5: 'shopping',
                6: 'meal',
                7: 'social & recreation'}

survey_tour['pdpurp_label'] = survey_tour['pdpurp'].map(purpose_dict)

# calculate tour departure time
survey_tour['tlvorig_hr'] = survey_tour['tlvorig']//60
# calculate time leaving tour destination
survey_tour['tlvdest_hr'] = survey_tour['tlvdest']//60
# calculate tour tour duration
survey_tour['tour_duration_hr'] = ((survey_tour['tarorig'] - survey_tour['tlvorig'])//60)*1
survey_tour.loc[survey_tour['tour_duration_hr']<0,'tour_duration_hr'] = ((survey_tour['tarorig'] + 1440 - survey_tour['tlvorig'])//60)*1


# trip_purpose_dict = {0: "home", 
#                      1: "work", 
#                      2: "school", 
#                      3: "escort", 
#                      4: "personal business", 
#                      5: "shopping", 
#                      6: "meal", 
#                      7: "social & recreation"}

# number of non-mandatory stops on tours
survey_tour_non_mand_stops = trip[trip['dpurp']>2].groupby(['hhno', 'pno', 'day', 'tour'])['tsvid'].agg(non_mand_stop_count = "size").reset_index()
survey_tour = survey_tour.merge(survey_tour_non_mand_stops, how='left', on=['hhno', 'pno', 'day', 'tour']).fillna(0).astype({'non_mand_stop_count': 'int32'})

# distance
survey_tour['tautodist_bin'] = pd.cut(survey_tour['tautodist'], bins=[0,1,2,5,15,9999],
                                    labels=['0 to 1', '1 to 2', '2 to 5', '5 to 15', '15 or more'])

# number of atwork subtours on person day
survey_subtours = survey_tour[survey_tour['parent']>0].groupby(['hhno', 'pno', 'day'])['tour'].agg(subtour_count = "size").reset_index()
# survey_tour_subtours = survey_tour_subtours.rename(columns={"parent": "tour"})
survey_person_day = survey_person_day.merge(survey_subtours, how='left', on=['hhno', 'pno', 'day']).fillna(0).astype({'subtour_count': 'int32'})


# for tour rates
# only 'commuter','hybrid worker'
survey_work_tour = survey_tour[(survey_tour['pdpurp']==1) & 
                               (survey_tour['worker_type_5']).isin(['commuter','hybrid worker'])].copy()


workers = ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]

## worker counts

In [4]:
# worker counts by worker type
df_worker_count = survey_person_day.groupby('worker_type_5')['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])

fig = px.bar(df_worker_count, x="worker_type_5", y="percent",
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker","not worker"]},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [5]:
# worker counts by worker type
df_worker_count = survey_person_day[survey_person_day['worker_type_5'].isin(workers)].\
    groupby('worker_type_5')['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])

fig = px.bar(df_worker_count, x="worker_type_5", y="percent",
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': workers},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [6]:
df_worker_count = survey_person_day[(survey_person_day['worker_type_5'].isin(workers)) & (survey_person_day['pagey']>0)].\
    groupby(['worker_type_5','pagey'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
# df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])
df_worker_count['percentage'] = df_worker_count.groupby('worker_type_5', group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_worker_count['pagey'] = df_worker_count['pagey'].astype(str)

fig = px.bar(df_worker_count,
             x="pagey", y="percentage", barmode="group",
             color='worker_type_5',
             category_orders={'worker_type_5': workers},
             hover_data=['pdexpfac','sample_count'],
             title="worker type by age")
fig.update_layout(height=400, width=750)
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

In [7]:
df_worker_count = survey_person_day[(survey_person_day['worker_type_5'].isin(workers))].\
    groupby(['worker_type_5','children'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
# df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])
df_worker_count['percentage'] = df_worker_count.groupby('worker_type_5', group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_worker_count,
             x="children", y="percentage", barmode="group",
             color='worker_type_5',
             category_orders={'worker_type_5': workers},
             hover_data=['pdexpfac','sample_count'],
             title="worker type by presence of children in household (age under 16)")
fig.update_layout(height=400, width=750)
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

## tour rate by worker type

In [8]:
def calc_rates(df, tour_type):
    # _df = df[df['worker_type_5'].isin(workers)].copy()
    _df = df.copy()
    _df[tour_type+'_wt'] = _df[tour_type]*_df['pdexpfac']
    # _df['wktours_wt'] = _df['wktours']*_df['pdexpfac']
    # _df['non_mandatory_tours_wt'] = (_df['estours']+_df['pbtours']+_df['shtours']+_df['mltours']+_df['sotours']+_df['retours']+_df['metours'])*_df['pdexpfac']

    _df2 = _df.groupby('worker_type_5')['pdexpfac'].sum().reset_index()
    _df3 = _df.groupby('worker_type_5')[tour_type+'_wt'].sum().reset_index()

    _df3['pdexpfac_sum'] = _df2['pdexpfac']
    _df3[tour_type + '_rate'] = _df3[tour_type+'_wt']/_df3['pdexpfac_sum']

    return _df3

In [9]:
def plot_tour_rate(df, tour_type, title):
    fig = px.bar(df, x="worker_type_5", y=tour_type+'_rate', 
                 barmode="group",hover_data=[tour_type+'_wt','pdexpfac_sum'],
                 category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]},
                 title=title)
    fig.update_layout(height=300, width=700, font=dict(size=11),
                        yaxis_tickformat = '.2f', yaxis_title='Tour Rate')
    fig.show()

In [10]:
df_plot = calc_rates(survey_person_day,'wktours')

plot_tour_rate(df_plot, 'wktours', "work tour tour rate")

- non-mandatory tour calculation:

`survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']`

In [11]:
df_plot = calc_rates(survey_person_day,'non_mandatory_tours')

plot_tour_rate(df_plot, 'non_mandatory_tours', "non-mandatory tour tour rate")

In [12]:
df_plot = calc_rates(survey_person_day,'estours')

plot_tour_rate(df_plot, 'estours', "escort tour tour rate")

In [13]:
df_plot = calc_rates(survey_person_day,'escort_stops')

plot_tour_rate(df_plot, 'escort_stops', "escort trip trip rate")

In [14]:

df_plot = calc_rates(survey_person_day,'subtour_count')

plot_tour_rate(df_plot, 'subtour_count', "atwork subtour tour rate")

## work tour 

### departure time

In [15]:
df_plot = survey_work_tour.groupby(['worker_type_5','tlvorig_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tlvorig_hr", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="work tour departure time (hour) by worker type")
fig.update_layout(height=400, width=750)
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

In [16]:
df_plot = survey_work_tour.groupby(['worker_type_5','tlvdest_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tlvdest_hr", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="work destination departure time (hour) by worker type")
fig.update_layout(height=400, width=750)
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

### tour duration 

In [17]:
df_plot = survey_work_tour.groupby(['worker_type_5','tour_duration_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tour_duration_hr", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="work tour duration (hour) by worker type")
fig.update_layout(height=400, width=750)
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

### number of non-mandatory stops

In [18]:
df_plot = survey_work_tour.groupby(['worker_type_5','non_mand_stop_count'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="non_mand_stop_count", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="non-mandatory stops on work tours by worker type")
fig.update_layout(height=400, width=750)
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

### distance to work

In [19]:
df_plot = survey_work_tour.groupby(['worker_type_5','tautodist_bin'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tautodist_bin", y="percentage", barmode="group",
             color='worker_type_5',
             category_orders={'tautodist_bin': ['0 to 1', '1 to 2', '2 to 5', '5 to 15', '15 or more']},
             hover_data=['toexpfac','sample_count'],
             title="distance to work by worker type")
fig.update_layout(height=400, width=750)
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()