In [20]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))
input_config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'input_configuration.toml'))
# model_dir = os.path.join(os.getcwd(), '..\\..\\..\\..\\')

In [21]:
hh = validation_data_input.get_data('household')
person = validation_data_input.get_data('person')
tour = validation_data_input.get_data('tour')

person_day = validation_data_input.get_data('person_day')

In [22]:
# person_day and tour has more people/households than person and hh data (inner join)
df_person_day = person_day.merge(person.merge(hh, on=['hhno','source'], how='left'), on=['hhno','pno','source'], how='inner')


- analysis is using daysim format survey data

- workers are divided into 5 groups:

1. work from home: home parcel = work parcel

2. telecommuter: work at home 3 hours or more and make no work tours

3. didn't work workers: work at home less than 3 hours and make no work tours

4. hybrid workers: work at home 3 hours or more and make at least one work tour

4. commuters: work at home less than 3 hours and make at least one work tour

In [23]:
survey_person_day = df_person_day[(df_person_day['source']=='survey')].copy()

# Define worker type
survey_person_day['worker_type_5'] = 'not worker'
survey_person_day.loc[(df_person_day['pwtyp']>0) & (survey_person_day['pwpcl']==survey_person_day['hhparcel']),'worker_type_5'] = 'wfh'
survey_person_day.loc[(df_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'telecommuter'
survey_person_day.loc[(df_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'didn\'t work worker'
survey_person_day.loc[(df_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'hybrid worker'
survey_person_day.loc[(df_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'commuter'

# test = survey_person_day[['hhno', 'pno', 'day','wktours','wkathome', 'pwtyp', 'pwpcl','hhparcel', 'worker_type','worker_type_5']].copy()
survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']

In [24]:
survey_tour = tour[tour['source']=='survey'].copy()
survey_tour = survey_tour.\
    merge(survey_person_day[['hhno', 'pno', 'day','wktours','wkathome', 'pwtyp', 'pwpcl','hhparcel', 'worker_type','worker_type_5']], 
          how='inner', on=['hhno', 'pno', 'day'])

purpose_dict = {1: 'work',
                2: 'school',
                3: 'escort',
                4: 'personal business',
                5: 'shopping',
                6: 'meal',
                7: 'social & recreation'}

survey_tour['pdpurp_label'] = survey_tour['pdpurp'].map(purpose_dict)

# calculate tour departure time and tour duration
survey_tour['tlvorig_hr'] = survey_tour['tlvorig']//60
survey_tour['tour_duration_hr'] = ((survey_tour['tardest'] - survey_tour['tlvorig'])//15)*0.25

survey_tour.loc[survey_tour['tour_duration_hr']<0,'tour_duration_hr'] = ((survey_tour['tardest'] + 1440 - survey_tour['tlvorig'])//15)*0.25

# for tour rates
survey_work_tour = survey_tour[(survey_tour['pdpurp']==1) & 
                               (survey_tour['worker_type_5']).isin(['commuter','hybrid worker'])].copy()



## worker counts

In [25]:
# worker counts by worker type
df_worker_count = survey_person_day.groupby('worker_type_5')['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])

fig = px.bar(df_worker_count, x="worker_type_5", y="percent",
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker","not worker"]},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [26]:
# worker counts by worker type
workers = ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]

df_worker_count = survey_person_day[survey_person_day['worker_type_5'].isin(workers)].\
    groupby('worker_type_5')['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])

fig = px.bar(df_worker_count, x="worker_type_5", y="percent",
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': workers},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

## tour rate by worker type

In [27]:
def calc_rates(df, tour_type):
    # _df = df[df['worker_type_5'].isin(workers)].copy()
    _df = df.copy()
    _df[tour_type+'_wt'] = _df[tour_type]*_df['pdexpfac']
    # _df['wktours_wt'] = _df['wktours']*_df['pdexpfac']
    # _df['non_mandatory_tours_wt'] = (_df['estours']+_df['pbtours']+_df['shtours']+_df['mltours']+_df['sotours']+_df['retours']+_df['metours'])*_df['pdexpfac']

    _df2 = _df.groupby('worker_type_5')['pdexpfac'].sum().reset_index()
    _df3 = _df.groupby('worker_type_5')[tour_type+'_wt'].sum().reset_index()

    _df3['pdexpfac_sum'] = _df2['pdexpfac']
    _df3[tour_type + '_rate'] = _df3[tour_type+'_wt']/_df3['pdexpfac_sum']

    return _df3

In [28]:
def plot_tour_rate(df, tour_type, title):
    fig = px.bar(df, x="worker_type_5", y=tour_type+'_rate', 
                 barmode="group",hover_data=[tour_type+'_wt','pdexpfac_sum'],
                 category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]},
                 title=title)
    fig.update_layout(height=300, width=700, font=dict(size=11),
                        yaxis_tickformat = '.2f', yaxis_title='Tour Rate')
    fig.show()

In [29]:
df_plot = calc_rates(survey_person_day,'wktours')

plot_tour_rate(df_plot, 'wktours', "work tour tour rate")

- non-mandatory tour calculation:

`survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']`

In [30]:
df_plot = calc_rates(survey_person_day,'non_mandatory_tours')

plot_tour_rate(df_plot, 'non_mandatory_tours', "non-mandatory tours tour rate")

## work tour departure time 

In [31]:
df_plot = survey_work_tour.groupby(['worker_type_5','tlvorig_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tlvorig_hr", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="work tour departure time (hour) by worker type")
fig.update_layout(height=400, width=750)
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()

## work tour duration 

In [32]:
df_plot = survey_work_tour.groupby(['worker_type_5','tour_duration_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="tour_duration_hr", y="percentage", barmode="group",
             color='worker_type_5',
             hover_data=['toexpfac','sample_count'],
             title="work tour duration (hour) by worker type")
fig.update_layout(height=400, width=750)
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
fig.show()