In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))
model_dir = os.path.join(os.getcwd(), '..\\..\\..\\..\\')

In [2]:
hh = validation_data_input.get_data('household',config['survey_dir'])
person = validation_data_input.get_data('person',config['survey_dir'])

In [3]:
tour = validation_data_input.get_data('tour',config['tour_survey_dir'])

In [4]:
person_day = validation_data_input.get_data('person_day',config['survey_dir'])

In [5]:
person_day = person_day.merge(person, on=['hhno','pno'], how='left')
person_day.drop('source_x', axis=1, inplace=True)
person_day.rename(columns={'source_y': 'source'}, inplace=True)

person_day = person_day.merge(hh, on=['hhno'], how='left')
person_day.drop('source_x', axis=1, inplace=True)
person_day.rename(columns={'source_y': 'source'}, inplace=True)


In [6]:
# Define worker type
person_day.loc[person_day['source']=='model','worker_type'] = 'commuter'
person_day.loc[(person_day['source']=='model')&(person_day['pwpcl']==person_day['hhparcel']),'worker_type'] = 'wfh'
person_day.loc[(person_day['source']=='model')&(person_day['wkathome']>3)&(person_day['pwpcl']!=person_day['hhparcel']),'worker_type'] = 'telecommuter'

In [7]:
# format work at home time
person_day['wkathome_int'] = person_day['wkathome']. \
    apply(lambda x: np.floor(x) if((x<10.0) & (x>=0.0)) else (0.0 if(x<0.0) else 10.0))
person_day['wkathome_hour'] = person_day['wkathome_int']. \
    apply(lambda x: str(int(x)) if (x<10.0) else "10+")

# person day data for workers
workers = person_day.loc[person_day['pwtyp']!=0].copy()

In [8]:
# tour data
def create_df(survey_df, model_df, col_list, expfac, purpose_list, dpurp_var='pdpurp'):
    df_survey = survey_df[col_list].copy()
    df_survey['source'] = "survey data"

    df_unweighted_survey = survey_df[col_list].copy()
    df_unweighted_survey[expfac] = 1
    df_unweighted_survey['source'] = "unweighted survey"

    df_model = model_df[col_list].copy()
    df_model['source'] = "model results"

    df = pd.concat([df_survey,
                    df_unweighted_survey,
                    df_model])

    df['tour_purpose'] = df[dpurp_var].map(purpose_list)

    return df


# FIXME: check dictionary
purpose_dict = {1: 'wktours',
                2: 'sctours',
                3: 'estours',
                4: 'pbtours',
                5: 'shtours',
                6: 'mltours',
                7: 'sotours',
                8: 'retours',
                9: 'metours'}
# create df
tour_list = ['hhno', 'pno', 'day', 'tour', 'pdpurp', 'tautodist', 'toexpfac']
# tour = create_df(tour, df_model_tour, tour_list, 'toexpfac', purpose_dict)
# get worker type
tour = tour.merge(person_day,
                  on=['hhno', 'pno', 'day', 'source'], how='left')

tour['tour_purpose'] = tour['pdpurp'].map(purpose_dict)


# Create bins: bins of 2 miles up to 60 miles
max_bin = 40
bin_size = 2
tour['dist_bins'] = pd.cut(tour['tautodist'], bins=np.arange(0, max_bin+bin_size, bin_size),
                                    labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

## worker counts

In [9]:
# worker counts by worker type
df_worker_count = workers.groupby(['source','worker_type'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_worker_count.sort_values(by=['source']), x="worker_type", y="percent", custom_data=['pdexpfac'],
             color="source",
             barmode="group",template="simple_white",
             title="workers by worker type")
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [10]:
# worker counts by worker type
df_worker_count = workers.groupby(['source','worker_type','pwtyp'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source','worker_type'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_worker_count
fig = px.bar(df_worker_count.sort_values(by=['source']), x='pwtyp', y="percent", custom_data=['pdexpfac'],
             facet_col="worker_type",color="source",
             barmode="group",template="simple_white",
             title="Share of full-time/part-time workers by worker type")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

### worker counts by telework hours

In [11]:
# population in each telework hour
df_hour_count = workers.groupby(['source','wkathome_int', 'wkathome_hour'])['pdexpfac'].sum().reset_index()
df_hour_count['percent'] = df_hour_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x:  x / float(x.sum()))

fig = px.bar(df_hour_count.sort_values(by=['source','wkathome_int']), x="wkathome_hour", y="percent", color="source",
             barmode="group",template="simple_white",
             title= "share of workers by telework hour")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1),
                  yaxis_tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10+'])
fig.show()

## work tours

In [12]:
_df = workers[workers['worker_type']=="telecommuter"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))
fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",template="simple_white",
             title= "teleworkers: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()


In [13]:
_df = workers[workers['worker_type']=="commuter"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",template="simple_white",
             title= "commuters: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [14]:
_df = workers[workers['worker_type']=="commuter"].groupby(['source','pwtyp','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source','pwtyp'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x='wktours', y="percent", custom_data=['pdexpfac'],
             facet_col="pwtyp",color="source",
             barmode="group",template="simple_white",
             title="commuter: number of work tours by full-/part-time workers")
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [15]:
_df = workers[workers['worker_type']=="wfh"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",template="simple_white",
             title= "work from home workers: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [16]:
_df = workers[workers['worker_type']=="wfh"].groupby(['source','pwtyp','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source','pwtyp'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x='wktours', y="percent", custom_data=['pdexpfac'],
             facet_col="pwtyp",color="source",
             barmode="group",template="simple_white",
             title="work from home: number of work tours by full-/part-time workers")
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [17]:
_df = workers.loc[workers['wktours']>0].groupby(['source','wkathome_int', 'wkathome_hour'])['pdexpfac'].sum().reset_index()

_df2 = df_hour_count[['source', 'wkathome_int', 'wkathome_hour']].copy()
_df2['total_workers'] = df_hour_count['pdexpfac']
_df = _df.merge(_df2, on=['source', 'wkathome_int', 'wkathome_hour'], how='left')
_df['percent'] = _df['pdexpfac']/_df['total_workers']


fig = px.bar(_df.sort_values(by=['source','wkathome_int']), x="wkathome_hour", y="percent", color="source",
                barmode="group",template="simple_white",
                title= "share of people making 1+ work tours by telework hours")
fig.update_layout(height=300, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1),
                  yaxis_tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10+'])
fig.show()

## Tour rates by destination purpose for each worker type



In [18]:
# tour counts by worker type and tour purpose
df_tour_rate = tour.groupby(['source','worker_type', 'tour_purpose'])['toexpfac'].sum().reset_index()
# get person counts
df_person_count = workers.groupby(['source','worker_type'])['pdexpfac']. \
    sum().reset_index()


df_tour_rate = df_tour_rate.merge(df_person_count, on=['source', 'worker_type'], how='left')

df_tour_rate['tour_rate'] = df_tour_rate['toexpfac']/df_tour_rate['pdexpfac']

def plot_tour_rate(df, worker_type):
    df_plot = df.loc[df['worker_type']==worker_type]
    fig = px.bar(df_plot, x="tour_purpose", y="tour_rate", color="source",
                 barmode="group",template="simple_white",
                 title= worker_type + "s: tour rates by destination purpose")
    fig.update_layout(height=300, width=700, font=dict(size=11),
                      yaxis_tickformat = '.2f')
    fig.show()


In [19]:
plot_tour_rate(df_tour_rate,"commuter")

In [20]:
# tour counts by worker type and tour purpose
df1 = tour.loc[(tour['worker_type']=="commuter") & (tour['wktours']==0)].groupby(['source','worker_type', 'tour_purpose'])['toexpfac'].sum().reset_index()
# get person counts
df2 = workers.loc[(workers['worker_type']=="commuter") & (workers['wktours']==0)].groupby(['source','worker_type'])['pdexpfac']. \
    sum().reset_index()

df1 = df1.merge(df2, on=['source', 'worker_type'], how='left')
df1['tour_rate'] = df1['toexpfac']/df1['pdexpfac']

fig = px.bar(df1, x="tour_purpose", y="tour_rate", color="source",
             barmode="group",template="simple_white",
             title= "commuter with no work tours: tour rates by destination purpose")
fig.update_layout(height=300, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2f')
fig.show()

In [21]:
# tour counts by worker type and tour purpose
df1 = tour.loc[(tour['worker_type']=="commuter") & (tour['pwpcl']==-1)].groupby(['source','worker_type', 'tour_purpose'])['toexpfac'].sum().reset_index()
# get person counts
df2 = workers.loc[(workers['worker_type']=="commuter") & (workers['pwpcl']==-1)].groupby(['source','worker_type'])['pdexpfac']. \
    sum().reset_index()

df1 = df1.merge(df2, on=['source', 'worker_type'], how='left')
df1['tour_rate'] = df1['toexpfac']/df1['pdexpfac']

fig = px.bar(df1, x="tour_purpose", y="tour_rate", color="source",
             barmode="group",template="simple_white",
             color_discrete_sequence=px.colors.qualitative.D3[1:3],
             title= "commuters with missing work location: tour rates by destination purpose")
fig.update_layout(height=300, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2f')
fig.show()

In [22]:
plot_tour_rate(df_tour_rate,"telecommuter")

In [23]:
plot_tour_rate(df_tour_rate,"wfh")

## Tour distances by purpose

In [24]:
df_tour_distance = tour.groupby(['source', 'worker_type', 'tour_purpose', 'dist_bins']). \
    sum()[['toexpfac']].reset_index()

df_tour_distance['percent'] = df_tour_distance. \
    groupby(['tour_purpose', 'worker_type', 'source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

In [25]:
def plot_tour_distance(df, dpurp, worker_type_list):

    df_plot = df.loc[(df['tour_purpose']==dpurp) & (df['worker_type'].isin(worker_type_list))].copy()

    fig2 = px.line(df_plot, x='dist_bins', y="percent", color="worker_type", template="simple_white",
                   facet_col='source',
                   title=dpurp + " tour distance")

    fig2.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
    fig2.update_layout(height=400, width=700, font=dict(size=11),
                       yaxis_tickformat = '.2%')
    fig2.show()


plot_tour_distance(df_tour_distance, "wktours",["wfh", "commuter", "telecommuter"])

In [26]:
plot_tour_distance(df_tour_distance, "sctours", ["wfh", "commuter", "telecommuter"])

In [27]:
plot_tour_distance(df_tour_distance, "estours", ["wfh", "commuter", "telecommuter"])

In [28]:
plot_tour_distance(df_tour_distance, "pbtours", ["wfh", "commuter", "telecommuter"])

In [29]:
plot_tour_distance(df_tour_distance, "shtours", ["wfh", "commuter", "telecommuter"])

In [30]:
plot_tour_distance(df_tour_distance, "mltours", ["wfh", "commuter", "telecommuter"])

In [31]:
plot_tour_distance(df_tour_distance, "sotours", ["wfh", "commuter", "telecommuter"])