In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import toml
import util
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))
input_config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'input_configuration.toml'))
# model_dir = os.path.join(os.getcwd(), '..\\..\\..\\..\\')

In [2]:
validation_data = util.ValidationData(config,input_config)

tour = validation_data.tour.to_pandas()
trip = validation_data.trip.to_pandas()
hh = validation_data.hh.to_pandas()
person = validation_data.person.to_pandas()
person_day = validation_data.person_day.to_pandas()
df_parcel = validation_data.land_use.to_pandas()

In [3]:
hh = hh[hh['source']=="survey"].copy()
person = person[person['source']=="survey"].copy()
tour = tour[tour['source']=="survey"].copy()
trip = trip[trip['source']=="survey"].copy()
person_day = person_day[person_day['source']=="survey"].copy()

- analysis is using daysim format survey data

- workers are divided into 5 groups:
    1. work from home: home parcel = work parcel
    2. telecommuter: work at home 3 hours or more and make no work tours
    3. didn't work workers: work at home less than 3 hours and make no work tours
    4. hybrid workers: work at home 3 hours or more and make at least one work tour
    5. commuters: work at home less than 3 hours and make at least one work tour

In [4]:
# person_day and tour has more people/households than person and hh data (inner join)
survey_person_day = person_day.merge(person.merge(hh, on=['hhno','source'], how='left'), on=['hhno','pno','source'], how='inner')

# Define worker type
survey_person_day['worker_type_5'] = 'not worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']==survey_person_day['hhparcel']),'worker_type_5'] = 'wfh'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'telecommuter'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']==0),'worker_type_5'] = 'didn\'t work worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']>=3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'hybrid worker'
survey_person_day.loc[(survey_person_day['pwtyp']>0) & (survey_person_day['pwpcl']!=survey_person_day['hhparcel']) & (survey_person_day['wkathome']<3) & (survey_person_day['wktours']>0),'worker_type_5'] = 'commuter'

# number of escort stops on person day
df_escort_stops = trip[trip['dpurp']==3].groupby(['hhno', 'pno', 'day','source'])['tsvid'].agg(escort_stops = "size").reset_index()
survey_person_day = survey_person_day.merge(df_escort_stops, how='left', on=['hhno', 'pno', 'day','source']).fillna(0).astype({'escort_stops': 'int32'})

# number of non-mandatory tours on person day
survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']

# precense of children in houseohld
survey_person_day['hhcu15'] = survey_person_day['hh515'] + survey_person_day['hhcu5']
survey_person_day['children'] = survey_person_day['hhcu15'].apply(lambda x: 'have kid(s)' if x>0 else 'no kid')


survey_tour = tour.\
    merge(survey_person_day[['hhno', 'pno', 'day','wktours','wkathome', 'pwtyp', 'pwpcl','hhparcel', 'worker_type','worker_type_5','source']], 
          how='inner', on=['hhno', 'pno', 'day','source'])

purpose_dict = {1: 'work',
                2: 'school',
                3: 'escort',
                4: 'personal business',
                5: 'shopping',
                6: 'meal',
                7: 'social & recreation'}

survey_tour['pdpurp_label'] = survey_tour['pdpurp'].map(purpose_dict)

# calculate tour departure time
survey_tour['tlvorig_hr'] = survey_tour['tlvorig']//60
# calculate time leaving tour destination
survey_tour['tlvdest_hr'] = survey_tour['tlvdest']//60
# calculate tour duration
survey_tour['tour_duration'] = survey_tour['tarorig'] - survey_tour['tlvorig']
survey_tour['tour_duration_hr'] = survey_tour['tour_duration']//60
survey_tour.loc[survey_tour['tour_duration_hr']<0,'tour_duration_hr'] = ((survey_tour['tarorig'] + 1440 - survey_tour['tlvorig'])//60)*1


# trip_purpose_dict = {0: "home", 
#                      1: "work", 
#                      2: "school", 
#                      3: "escort", 
#                      4: "personal business", 
#                      5: "shopping", 
#                      6: "meal", 
#                      7: "social & recreation"}

# number of non-mandatory stops on tours
# survey_tour_non_mand_stops = trip[trip['dpurp']>2].groupby(['hhno', 'pno', 'day', 'tour','source'])['tsvid'].agg(non_mand_stop_count = "size").reset_index()
survey_tour_escort_stops = trip[trip['dpurp']==3].groupby(['hhno', 'pno', 'day', 'tour','source'])['tsvid'].agg(escort_stop_count = "size").reset_index()
survey_tour_count_stops = trip.groupby(['hhno', 'pno', 'day', 'tour','source'])['tsvid'].agg(trip_count = "size").reset_index()
survey_tour_count_stops['all_stop_count'] = survey_tour_count_stops['trip_count'] - 2
survey_tour = survey_tour.merge(survey_tour_count_stops, how='left', on=['hhno', 'pno', 'day', 'tour','source']).fillna(0).astype({'all_stop_count': 'int32'})
# survey_tour = survey_tour.merge(survey_tour_non_mand_stops, how='left', on=['hhno', 'pno', 'day', 'tour','source']).fillna(0).astype({'non_mand_stop_count': 'int32'})
survey_tour = survey_tour.merge(survey_tour_escort_stops, how='left', on=['hhno', 'pno', 'day', 'tour','source']).fillna(0).astype({'escort_stop_count': 'int32'})

# distance
survey_tour['tautodist_bin'] = pd.cut(survey_tour['tautodist'], bins=[0,1,2,5,10,15,9999],
                                    labels=['0 to 1', '1 to 2', '2 to 5', '5 to 10', '10 to 15', '15 or more'])

# number of atwork subtours on person day
# survey_subtours = survey_tour[survey_tour['parent']>0].groupby(['hhno', 'pno', 'day','source'])['tour'].agg(subtour_count = "size").reset_index()
# survey_tour_subtours = survey_tour_subtours.rename(columns={"parent": "tour"})
# survey_person_day = survey_person_day.merge(survey_subtours, how='left', on=['hhno', 'pno', 'day','source']).fillna(0).astype({'subtour_count': 'int32'})


# for tour rates
# only 'commuter','hybrid worker'
survey_work_tour = survey_tour[survey_tour['pdpurp']==1].copy()
survey_non_man_tour = survey_tour[survey_tour['pdpurp']>2].copy()


workers = ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]

## worker counts

In [5]:
# worker counts by worker type
df_worker_count = survey_person_day.groupby(['source','worker_type_5'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_worker_count, x="worker_type_5", y="percent", color='source',
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker","not worker"]},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [6]:
# worker counts by worker type
df_worker_count = survey_person_day[survey_person_day['worker_type_5'].isin(workers)].\
    groupby(['source','worker_type_5'])['pdexpfac'].\
    agg(sample_count='size', pdexpfac='sum').reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_worker_count, x="worker_type_5", y="percent", color="source",
             barmode="group",hover_data=['sample_count','pdexpfac'],
             category_orders={'worker_type_5': workers},
             title="person count by worker type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [7]:
for sc in survey_person_day['source'].unique():

    df = survey_person_day[survey_person_day['source']==sc]

    df_worker_count = df[(df['worker_type_5'].isin(workers)) & (df['pagey']>0)].\
        groupby(['worker_type_5','pagey'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
    # df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])
    df_worker_count['percentage'] = df_worker_count.groupby('worker_type_5', group_keys=False)['pdexpfac']. \
        apply(lambda x: x / float(x.sum()))
    df_worker_count['pagey'] = df_worker_count['pagey'].astype(str)

    fig = px.bar(df_worker_count,
                x="pagey", y="percentage", barmode="group",
                color='worker_type_5',
                category_orders={'worker_type_5': workers},
                hover_data=['pdexpfac','sample_count'],
                title=f"{sc}: worker type by age")
    fig.update_layout(height=400, width=750)
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

In [8]:
for sc in survey_person_day['source'].unique():

    df = survey_person_day[survey_person_day['source']==sc]

    df_worker_count = df[(df['worker_type_5'].isin(workers))].\
        groupby(['worker_type_5','children'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
    # df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])
    df_worker_count['percentage'] = df_worker_count.groupby('worker_type_5', group_keys=False)['pdexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_worker_count,
                x="children", y="percentage", barmode="group",
                color='worker_type_5',
                category_orders={'worker_type_5': workers},
                hover_data=['pdexpfac','sample_count'],
                title=f"{sc}: worker type by presence of children in household (age under 16)")
    fig.update_layout(height=400, width=750)
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

In [9]:
for sc in survey_person_day['source'].unique():

    df = survey_person_day[survey_person_day['source']==sc]
    df['worker_type'] = df['pwtyp'].map({1: 'full-time', 2: 'part-time'})

    df_worker_count = df[(df['worker_type_5'].isin(workers))].\
        groupby(['worker_type_5','worker_type'])['pdexpfac'].agg(sample_count='size', pdexpfac='sum').reset_index()
    # df_worker_count['percent'] = df_worker_count['pdexpfac'] / sum(df_worker_count['pdexpfac'])
    df_worker_count['percentage'] = df_worker_count.groupby('worker_type_5', group_keys=False)['pdexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_worker_count,
                x="worker_type", y="percentage", barmode="group",
                color='worker_type_5',
                category_orders={'worker_type_5': workers},
                hover_data=['pdexpfac','sample_count'],
                title=f"{sc}: worker type by presence of children in household (age under 16)")
    fig.update_layout(height=400, width=750)
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

## tour rate by worker type

In [10]:
def calc_rates(df, tour_type):
    # _df = df[df['worker_type_5'].isin(workers)].copy()
    _df = df.copy()
    _df[tour_type+'_wt'] = _df[tour_type]*_df['pdexpfac']
    # _df['wktours_wt'] = _df['wktours']*_df['pdexpfac']
    # _df['non_mandatory_tours_wt'] = (_df['estours']+_df['pbtours']+_df['shtours']+_df['mltours']+_df['sotours']+_df['retours']+_df['metours'])*_df['pdexpfac']

    _df2 = _df.groupby(['source','worker_type_5'])['pdexpfac'].sum().reset_index()
    _df3 = _df.groupby(['source','worker_type_5'])[tour_type+'_wt'].sum().reset_index()

    _df3['pdexpfac_sum'] = _df2['pdexpfac']
    _df3[tour_type + '_rate'] = _df3[tour_type+'_wt']/_df3['pdexpfac_sum']

    return _df3

In [11]:
def plot_tour_rate(df, tour_type, title):
    fig = px.bar(df, x="worker_type_5", y=tour_type+'_rate', color='source',
                 barmode="group",hover_data=[tour_type+'_wt','pdexpfac_sum'],
                 category_orders={'worker_type_5': ["wfh","telecommuter","hybrid worker","commuter","didn't work worker"]},
                 title=title)
    fig.update_layout(height=300, width=700, font=dict(size=11),
                        yaxis_tickformat = '.2f', yaxis_title='Tour Rate')
    fig.show()

In [12]:
df_plot = calc_rates(survey_person_day,'wktours')

plot_tour_rate(df_plot, 'wktours', "work tour tour rate")

- non-mandatory tour calculation:

`survey_person_day['non_mandatory_tours'] = survey_person_day['estours']+survey_person_day['pbtours']+survey_person_day['shtours']+survey_person_day['mltours']+survey_person_day['sotours']+survey_person_day['retours']+survey_person_day['metours']`

In [13]:
df_plot = calc_rates(survey_person_day,'non_mandatory_tours')

plot_tour_rate(df_plot, 'non_mandatory_tours', "non-mandatory tour tour rate")

In [14]:
df_plot = calc_rates(survey_person_day,'estours')

plot_tour_rate(df_plot, 'estours', "escort tour tour rate")

In [15]:
df_plot = calc_rates(survey_person_day,'escort_stops')

plot_tour_rate(df_plot, 'escort_stops', "escort trip trip rate")

In [16]:
survey_tour_with_escort = survey_tour[survey_tour['escort_stop_count']>0].copy()

# worker counts by worker type
df_count = survey_tour_with_escort.\
    groupby(['source','worker_type_5','pdpurp_label'])['toexpfac'].\
    agg(sample_count='size', toexpfac='sum').reset_index()
df_count['percentage'] = df_count.groupby(['source','worker_type_5'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

for sc in df_count['source'].unique():

    fig = px.bar(df_count[df_count['source']==sc],
                x="pdpurp_label", y="percentage", barmode="group",
                color='worker_type_5',
                category_orders={'worker_type_5': workers,
                                 'pdpurp_label': [*purpose_dict.values()]},
                hover_data=['toexpfac','sample_count'],
                title=f"{sc}: escort trip distribution across tour purpose")
    fig.update_layout(height=400, width=750)
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

- question: subtour tour rate too low
- number of subtours among all workers:

In [17]:
survey_person_day[survey_person_day['worker_type_5'].isin(workers)]['wbtours'].value_counts()

wbtours
0    3401
1     154
2      12
Name: count, dtype: int64

In [18]:
#| echo: true
324/6653

0.048699834661055166

In [19]:

df_plot = calc_rates(survey_person_day,'wbtours')

plot_tour_rate(df_plot, 'wbtours', "atwork subtour tour rate")

## tours by worker type

### departure time

In [20]:
def plot_origin_depart(tourdata, title_name):

    for sc in survey_person_day['source'].unique():

        df = tourdata[tourdata['source']==sc]

        df_plot = df.groupby(['worker_type_5','tlvorig_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
        df_plot['percentage'] = df_plot.groupby(['worker_type_5'], group_keys=False)['toexpfac']. \
            apply(lambda x: x / float(x.sum()))

        fig = px.bar(df_plot,
                    x="tlvorig_hr", y="percentage", barmode="group",
                    color='worker_type_5',
                    hover_data=['toexpfac','sample_count'],
                    title=f"{sc}: " + title_name)
        fig.update_layout(height=400, width=750)
        fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
        fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".2%")))
        fig.show()

plot_origin_depart(survey_work_tour, "work tour departure time (hour) by worker type")


In [21]:
plot_origin_depart(survey_non_man_tour, "non-mandatory tour departure time (hour) by worker type")

In [22]:
for sc in survey_person_day['source'].unique():

    df = survey_work_tour[survey_work_tour['source']==sc]

    df_plot = df.groupby(['worker_type_5','tlvdest_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
    df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot,
                x="tlvdest_hr", y="percentage", barmode="group",
                color='worker_type_5',
                hover_data=['toexpfac','sample_count'],
                title=f"{sc}: work destination departure time (hour) by worker type")
    fig.update_layout(height=400, width=750)
    fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

### tour duration 

In [23]:
def plot_duration(tourdata, title_name):

    for sc in survey_person_day['source'].unique():

        df = tourdata[tourdata['source']==sc]

        df_plot = df.groupby(['worker_type_5','tour_duration_hr'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
        df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
            apply(lambda x: x / float(x.sum()))

        fig = px.bar(df_plot,
                    x="tour_duration_hr", y="percentage", barmode="group",
                    color='worker_type_5',
                    hover_data=['toexpfac','sample_count'],
                    title=f"{sc}: " + title_name)
        fig.update_layout(height=350, width=750)
        fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
        fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
        fig.show()

plot_duration(survey_work_tour, "work tour duration (hour) by worker type")

In [24]:
def print_avg_duration(tourdata):
    df = tourdata.copy()
    df['tour_duration_toexpfac'] = df['tour_duration'] * df['toexpfac']

    df_plot = df.groupby(['worker_type_5']).agg({'tour_duration_toexpfac':'sum',
                                                'toexpfac':'sum'}).reset_index()
    df_plot['avg_duration_hr'] = df_plot['tour_duration_toexpfac']/df_plot['toexpfac']/60
    display(df_plot.style.\
        format('{:,.2f}', subset=['tour_duration_toexpfac', 'toexpfac', 'avg_duration_hr']))

print_avg_duration(survey_work_tour)


Unnamed: 0,worker_type_5,tour_duration_toexpfac,toexpfac,avg_duration_hr
0,commuter,503192358.02,932383.79,8.99
1,hybrid worker,109825081.83,213410.46,8.58
2,not worker,5298292.32,10610.16,8.32
3,wfh,2039774.48,10239.82,3.32


In [25]:
plot_duration(survey_non_man_tour, "non-mandatory tour duration (hour) by worker type")

In [26]:
print_avg_duration(survey_non_man_tour)

Unnamed: 0,worker_type_5,tour_duration_toexpfac,toexpfac,avg_duration_hr
0,commuter,36705187.07,390962.9,1.56
1,didn't work worker,57296286.24,294079.2,3.25
2,hybrid worker,2058176.7,48530.18,0.71
3,not worker,240951977.07,1428651.16,2.81
4,telecommuter,17888022.81,144879.87,2.06
5,wfh,27442569.14,277011.02,1.65


### number of stops

In [27]:
cut_outlier = 5

for sc in survey_person_day['source'].unique():

    df = survey_work_tour[(survey_work_tour['source']==sc) & ~(survey_work_tour['worker_type_5'].isin(['not worker','wfh']))]

    df_plot = df.groupby(['worker_type_5','all_stop_count'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
    df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot[df_plot['all_stop_count']<cut_outlier],
                x="all_stop_count", y="percentage", barmode="group",
                color='worker_type_5',
                hover_data=['toexpfac','sample_count'],
                title=f"{sc}: number of stops on work tours by worker type")
    fig.update_layout(height=300, width=600)
    fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

In [28]:
df_table = df_plot[df_plot['all_stop_count']>=cut_outlier].copy()
df_table[['toexpfac']] = df_table[['toexpfac']].applymap("{:,.0f}".format)
df_table[['percentage']] = df_table[['percentage']].applymap("{:,.2%}".format)

df_table


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0,worker_type_5,all_stop_count,sample_count,toexpfac,percentage
5,commuter,5,6,4806,0.52%
6,commuter,6,2,113,0.01%
7,commuter,7,2,9058,0.97%
8,commuter,8,2,9076,0.97%
9,commuter,9,2,325,0.03%
10,commuter,11,1,11,0.00%
11,commuter,12,1,17,0.00%
12,commuter,23,2,180,0.02%
13,commuter,47,1,143,0.02%
19,hybrid worker,5,2,254,0.12%


In [29]:
for sc in survey_person_day['source'].unique():

    df = survey_non_man_tour[(survey_non_man_tour['source']==sc)]

    df_plot = df.groupby(['worker_type_5','all_stop_count'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
    df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot[df_plot['all_stop_count']<cut_outlier],
                x="all_stop_count", y="percentage", barmode="group",
                color='worker_type_5',
                hover_data=['toexpfac','sample_count'],
                title=f"{sc}: number of stops on non-mandatory tours by worker type")
    fig.update_layout(height=350, width=600)
    fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()

In [30]:
df_table = df_plot[df_plot['all_stop_count']>=cut_outlier].copy()
df_table[['toexpfac']] = df_table[['toexpfac']].applymap("{:,.0f}".format)
df_table[['percentage']] = df_table[['percentage']].applymap("{:,.2%}".format)

df_table


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0,worker_type_5,all_stop_count,sample_count,toexpfac,percentage
5,commuter,5,1,8617,2.20%
11,didn't work worker,5,5,5660,1.92%
12,didn't work worker,6,4,2033,0.69%
18,hybrid worker,5,1,48,0.10%
24,not worker,5,26,10378,0.73%
25,not worker,6,11,4276,0.30%
26,not worker,7,11,6953,0.49%
27,not worker,8,1,250,0.02%
28,not worker,9,1,519,0.04%
34,telecommuter,6,1,700,0.48%


### distance to work

- removed not worker and work from home

In [31]:
for sc in survey_person_day['source'].unique():

    df = survey_work_tour[(survey_work_tour['source']==sc) & ~(survey_work_tour['worker_type_5'].isin(['not worker','wfh']))]

    df_plot = df.groupby(['worker_type_5','tautodist_bin'])['toexpfac'].agg(sample_count='size', toexpfac='sum').reset_index()
    df_plot['percentage'] = df_plot.groupby('worker_type_5', group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot,
                x="tautodist_bin", y="percentage", barmode="group",
                color='worker_type_5',
                category_orders={'tautodist_bin': ['0 to 1', '1 to 2', '2 to 5', '5 to 10', '10 to 15', '15 or more']},
                hover_data=['toexpfac','sample_count'],
                title=f"{sc}: distance to work by worker type")
    fig.update_layout(height=300, width=600)
    fig.for_each_yaxis(lambda a: a.update(dict(tickformat=".0%")))
    fig.show()



