In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))
input_config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'input_configuration.toml'))
# model_dir = os.path.join(os.getcwd(), '..\\..\\..\\..\\')

In [2]:
hh = validation_data_input.get_data('household')
person = validation_data_input.get_data('person')
tour = validation_data_input.get_data('tour')

person_day = validation_data_input.get_data('person_day')

In [3]:
# Add geographic lookups at parcel level; only load relevant columns
test_path = "sqlite:///" + config['model_dir'] + "/inputs/db/soundcast_inputs.db"

parcel_geog = pd.read_sql_table('parcel_'+input_config['base_year']+'_geography', 
                                test_path, columns=['ParcelID','CountyName', 'District', 'district_name'])

In [4]:
# Load parcels_urbansim input
parcel_urbansim = pd.read_csv(os.path.join(config['model_dir'], 'inputs/scenario/landuse/parcels_urbansim.txt'), delim_whitespace=True, usecols=['PARCELID', 'EMPOFC_P','EMPTOT_P'])

In [5]:
df_parcel = parcel_geog.merge(parcel_urbansim, left_on='ParcelID', right_on='PARCELID', how='left')
df_parcel['% office jobs'] = df_parcel['EMPOFC_P']/df_parcel['EMPTOT_P']

bins = pd.IntervalIndex.from_tuples([(0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1)], closed='left')
df_parcel['% office jobs (bins)'] = pd.cut(df_parcel['% office jobs'], bins)
# fill in NA with no jobs
df_parcel['% office jobs (bins)'] = df_parcel['% office jobs (bins)'].cat.add_categories(["no jobs"])
df_parcel.loc[df_parcel['% office jobs (bins)'].isna(),'% office jobs (bins)'] = "no jobs"

In [6]:
df_person_day = person_day.merge(person.merge(hh, on=['hhno','source'], how='left'), on=['hhno','pno','source'], how='left')
df_person_day = df_person_day.merge(df_parcel[['ParcelID', 'CountyName', 'district_name','% office jobs (bins)']],
                            left_on='pwpcl', right_on='ParcelID', how='left')

In [7]:
# person_day = person_day.merge(person, on=['hhno','pno'], how='left')
# person_day.drop('source_x', axis=1, inplace=True)
# person_day.rename(columns={'source_y': 'source'}, inplace=True)
#
# person_day = person_day.merge(hh, on=['hhno'], how='left')
# person_day.drop('source_x', axis=1, inplace=True)
# person_day.rename(columns={'source_y': 'source'}, inplace=True)

In [8]:
# Define worker type
df_person_day.loc[df_person_day['source']=='model','worker_type'] = 'commuter'
df_person_day.loc[(df_person_day['source']=='model')&(df_person_day['pwpcl']==df_person_day['hhparcel']),'worker_type'] = 'wfh'
df_person_day.loc[(df_person_day['source']=='model')&(df_person_day['wkathome']>3)&(df_person_day['pwpcl']!=df_person_day['hhparcel']),'worker_type'] = 'telecommuter'

In [9]:
# format work at home time
df_person_day['wkathome_int'] = df_person_day['wkathome']. \
    apply(lambda x: np.floor(x) if((x<10.0) & (x>=0.0)) else (0.0 if(x<0.0) else 10.0))
df_person_day['wkathome_hour'] = df_person_day['wkathome_int']. \
    apply(lambda x: str(int(x)) if (x<10.0) else "10+")

# person day data for workers
workers = df_person_day.loc[df_person_day['pwtyp']!=0].copy()

In [10]:
# tour data
# FIXME: check dictionary
purpose_dict = {1: 'wktours',
                2: 'sctours',
                3: 'estours',
                4: 'pbtours',
                5: 'shtours',
                6: 'mltours',
                7: 'sotours',
                8: 'retours',
                9: 'metours'}
# create df
tour_list = ['hhno', 'pno', 'day', 'tour', 'pdpurp', 'tautodist', 'toexpfac']
# get worker type
tour = tour.merge(df_person_day,
                  on=['hhno', 'pno', 'day', 'source'], how='left')

tour['tour_purpose'] = tour['pdpurp'].map(purpose_dict)


# Create bins: bins of 2 miles up to 60 miles
max_bin = 40
bin_size = 2
tour['dist_bins'] = pd.cut(tour['tautodist'], bins=np.arange(0, max_bin+bin_size, bin_size),
                                    labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

## worker counts

In [11]:
# worker counts by worker type
df_worker_count = workers.groupby(['source','worker_type'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_worker_count.sort_values(by=['source']), x="worker_type", y="percent", custom_data=['pdexpfac'],
             color="source", barmode="group",
             title="workers by worker type")
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

### worker type by segment

In [12]:
# worker counts by worker type
df_worker_count = workers.groupby(['source','worker_type','pwtyp'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source','worker_type'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_worker_count
fig = px.bar(df_worker_count.sort_values(by=['source']), x='pwtyp', y="percent", custom_data=['pdexpfac'],
             facet_col="worker_type",color="source",
             barmode="group",
             title="Share of full-time/part-time workers by worker type")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [13]:
df_worker_count = workers.groupby(['source','CountyName','worker_type'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source','CountyName'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_worker_count
fig = px.bar(df_worker_count.sort_values(by=['source']), x='worker_type', y="percent", custom_data=['pdexpfac'],
             facet_col="CountyName", color="source",
             barmode="group",
             title="worker type by county")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['commuter','telecommuter','wfh'])
fig.show()

In [14]:
df_worker_count = workers.groupby(['source','district_name','worker_type'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source','district_name'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_worker_count
fig = px.bar(df_worker_count.sort_values(by=['source','district_name']), x='worker_type', y="percent", custom_data=['pdexpfac'],
             facet_col="district_name",facet_col_wrap=4,color="source",
             barmode="group",
             title="worker type by district")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=700, width=900, font=dict(size=11))
fig.update_yaxes(tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['commuter','telecommuter','wfh'])
fig.show()

In [15]:
df_worker_count = workers.groupby(['source','% office jobs (bins)','worker_type'])['pdexpfac']. \
    sum().reset_index()
df_worker_count['percent'] = df_worker_count.groupby(['source','% office jobs (bins)'], group_keys=False)['pdexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_worker_count
fig = px.bar(df_worker_count.sort_values(by=['source','% office jobs (bins)']), x='worker_type', y="percent", custom_data=['pdexpfac'],
             facet_col="% office jobs (bins)",color="source",
             barmode="group",
             title="worker type by % office jobs in work parcel (bins)")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=900, font=dict(size=11))
fig.update_yaxes(tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['commuter','telecommuter','wfh'])
fig.show()

### worker counts by telework hours

In [16]:
# population in each telework hour
df_hour_count = workers.groupby(['source','wkathome_int', 'wkathome_hour'])['pdexpfac'].sum().reset_index()
df_hour_count['percent'] = df_hour_count.groupby(['source'], group_keys=False)['pdexpfac']. \
    apply(lambda x:  x / float(x.sum()))

fig = px.bar(df_hour_count.sort_values(by=['source','wkathome_int']), x="wkathome_hour", y="percent", color="source",
             barmode="group",
             title= "share of workers by telework hour")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1),
                  yaxis_tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10+'])
fig.show()

## work tours

In [17]:
_df = workers[workers['worker_type']=="telecommuter"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))
fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",
             title= "teleworkers: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()


In [18]:
_df = workers[workers['worker_type']=="commuter"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",
             title= "commuters: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [19]:
_df = workers[workers['worker_type']=="commuter"].groupby(['source','pwtyp','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source','pwtyp'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x='wktours', y="percent", custom_data=['pdexpfac'],
             facet_col="pwtyp",color="source",
             barmode="group",
             title="commuter: number of work tours by full-/part-time workers")
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [20]:
_df = workers[workers['worker_type']=="wfh"].groupby(['source','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x="wktours", y="percent", color="source",
             barmode="group",
             title= "work from home workers: number of work tours")
fig.update_layout(height=350, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [21]:
_df = workers[workers['worker_type']=="wfh"].groupby(['source','pwtyp','wktours'])['pdexpfac'].sum().reset_index()
_df['percent'] = _df.groupby(['source','pwtyp'], group_keys=False)['pdexpfac'].apply(lambda x: x / float(x.sum()))

fig = px.bar(_df, x='wktours', y="percent", custom_data=['pdexpfac'],
             facet_col="pwtyp",color="source",
             barmode="group",
             title="work from home: number of work tours by full-/part-time workers")
# fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_traces(hovertemplate="share of workers: %{y:.2%}<br>" +
                                "worker counts: %{customdata[0]:.0f}",)
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis_tickformat = '.2%')
fig.show()

In [22]:
_df = workers.loc[workers['wktours']>0].groupby(['source','wkathome_int', 'wkathome_hour'])['pdexpfac'].sum().reset_index()

_df2 = df_hour_count[['source', 'wkathome_int', 'wkathome_hour']].copy()
_df2['total_workers'] = df_hour_count['pdexpfac']
_df = _df.merge(_df2, on=['source', 'wkathome_int', 'wkathome_hour'], how='left')
_df['percent'] = _df['pdexpfac']/_df['total_workers']


fig = px.bar(_df.sort_values(by=['source','wkathome_int']), x="wkathome_hour", y="percent", color="source",
                barmode="group",
                title= "share of people making 1+ work tours by telework hours")
fig.update_layout(height=300, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1),
                  yaxis_tickformat = '.2%')
fig.update_xaxes(categoryorder='array', categoryarray= ['0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10+'])
fig.show()

## Tour rates by destination purpose for each worker type



In [23]:
def calc_rates(df):
    # use person day file to calculate tour rates by purpose

    for col in ['wbtours','uwtours','wktours','sctours','estours','pbtours','shtours',
                'mltours','sotours','retours','metours']:
        df[col+'_wt'] = df[col]*df['pdexpfac']

    df_tour_rates = pd.DataFrame()
    for col in ['wbtours','uwtours','wktours','sctours','estours','pbtours','shtours',
                'mltours','sotours']:

        df_num = pd.pivot_table(df, columns='worker_type', index='source', aggfunc='sum', values=col+'_wt')
        df_denom = pd.pivot_table(df, columns='worker_type', index='source', aggfunc='sum', values='pdexpfac')
        df_out = df_num/df_denom
        df_out['purpose'] = col
        df_tour_rates = pd.concat([df_tour_rates,df_out])
    df_tour_rates = df_tour_rates.reset_index()

    return df_tour_rates

In [24]:
df_tour_rates = calc_rates(workers)

In [25]:
# # tour counts by worker type and tour purpose

def plot_tour_rate(df, worker_type, title):

    fig = px.bar(df, x="purpose", y=worker_type, color="source",
                 barmode="group",
                 title=title)
    fig.update_layout(height=300, width=700, font=dict(size=11),
                      yaxis_tickformat = '.2f', yaxis_title='Tour Rate')
    fig.show()


In [26]:
df_tour_rates = calc_rates(workers)
plot_tour_rate(df_tour_rates[['source','commuter','purpose']], 
                'commuter',
                "Tour Rates by Destination Purpose" 
)

In [27]:
workers_no_work_tours = workers[workers['wktours'] == 0].copy()
df_tour_rates = calc_rates(workers_no_work_tours)
plot_tour_rate(df_tour_rates[['source','commuter','purpose']], 
                'commuter',
                "Commuter with no work tours: tour rates by destination purpose" 
)

In [28]:
workers_missing_work_loc = workers[workers['pwpcl'] == -1].copy()
df_tour_rates = calc_rates(workers_missing_work_loc)
plot_tour_rate(df_tour_rates[['source','commuter','purpose']], 
                'commuter',
                "Commuters with missing work location: tour rates by destination purpose" 
)

In [29]:
df_tour_rates = calc_rates(workers)
plot_tour_rate(df_tour_rates[['source','telecommuter','purpose']], 
                'telecommuter',
                "Tour rates by destination purpose" 
)

In [30]:
df_tour_rates = calc_rates(workers)
plot_tour_rate(df_tour_rates[['source','wfh','purpose']], 
                'wfh',
                " tour rates by destination purpose" 
)

## Tour distances by purpose

In [31]:
df_tour_distance = tour.groupby(['source', 'worker_type', 'tour_purpose', 'dist_bins']). \
    sum()[['toexpfac']].reset_index()

df_tour_distance['percent'] = df_tour_distance. \
    groupby(['tour_purpose', 'worker_type', 'source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

In [32]:
def plot_tour_distance(df, dpurp, worker_type_list):

    df_plot = df.loc[(df['tour_purpose']==dpurp) & (df['worker_type'].isin(worker_type_list))].copy()

    fig2 = px.line(df_plot, x='dist_bins', y="percent", color="worker_type", template="simple_white",
                   facet_col='source',
                   title=dpurp + " tour distance")

    fig2.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
    fig2.update_layout(height=400, width=700, font=dict(size=11),
                       yaxis_tickformat = '.2%')
    fig2.show()


plot_tour_distance(df_tour_distance, "wktours",["wfh", "commuter", "telecommuter"])

In [33]:
plot_tour_distance(df_tour_distance, "sctours", ["wfh", "commuter", "telecommuter"])

In [34]:
plot_tour_distance(df_tour_distance, "estours", ["wfh", "commuter", "telecommuter"])

In [35]:
plot_tour_distance(df_tour_distance, "pbtours", ["wfh", "commuter", "telecommuter"])

In [36]:
plot_tour_distance(df_tour_distance, "shtours", ["wfh", "commuter", "telecommuter"])

In [37]:
plot_tour_distance(df_tour_distance, "mltours", ["wfh", "commuter", "telecommuter"])

In [38]:
plot_tour_distance(df_tour_distance, "sotours", ["wfh", "commuter", "telecommuter"])