In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

In [2]:
%store -r validation_data

config = validation_data.config.copy()
person = validation_data.person.copy()
tour = validation_data.tour.copy()
person_day = validation_data.person_day.copy()

Total Tours

In [3]:
df = tour.groupby('source').sum()[['toexpfac']].reset_index()
df.rename(columns={'toexpfac': 'Total Tours'}, inplace=True)
df['Total Tours'] = df['Total Tours'].apply(lambda x: f"{x:,.0f}")
df

Unnamed: 0,source,Total Tours
0,model,5723273
1,survey,5858894
2,survey (2017/2019),5095327
3,survey (old weights),5565495


In [4]:
df_tour = tour.copy()
df_person_day = person_day.copy()

mode_cat = {1: "1: walk",
            2: "2: bike",
            3: "3: sov",
            4: "4: hov 2",
            5: "5: hov 3+",
            6: "6: walk to transit",
            7: "7: park-and-ride",
            8: "8: school bus",
            9: "9:tnc"}
df_tour['tmodetp_label'] = df_tour['tmodetp'].map(mode_cat)

pdpurp_cat = {1: "1: Work",
              2: "2: School",
              3: "3: Escort",
              4: "4: Personal Business",
              5: "5: Shop",
              6: "6: Meal",
              7: "7: Social"}
df_tour['pdpurp_label'] = df_tour['pdpurp'].map(pdpurp_cat)

# df_tour = df_tour.merge(df_person, how='left', on=['pno','hhno','source'])

In [5]:
def tour_stats(df:pd.DataFrame, var:str, title_cat:str, wid = 700):
    df_plot = df.groupby(['source',var])['toexpfac'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))
    
    df_plot_ct = df.groupby(['source',var])['toexpfac'].count().reset_index(). \
        rename(columns={'toexpfac':'sample count'})
    df_plot = df_plot.merge(df_plot_ct, on=['source',var])
    
    fig = px.bar(df_plot.sort_values(by=['source']), x=var, y="percentage", color="source",
                 barmode="group",hover_data=['sample count'],title=title_cat)
    fig.update_layout(height=400, width=700, font=dict(size=11),
                      xaxis = dict(dtick = 1, categoryorder='category ascending'),
                      yaxis=dict(tickformat=".2%"))
    fig.show()

In [6]:
tour_stats(df_tour,'tmodetp_label','tour mode')

In [7]:
tour_stats(df_tour,'pdpurp_label','tour purpose')

In [8]:
all_purpose = ['wbtours','wktours','sctours','estours','pbtours','shtours','mltours','sotours']

df_person_day2 = df_person_day.copy()
for col in all_purpose:
    df_person_day2[col] = df_person_day2[col]*df_person_day2['pdexpfac']

df = pd.pivot_table(df_person_day2, 
                       values=all_purpose+['pdexpfac'], index=['source'],
                       aggfunc="sum").\
         reset_index().\
    melt(id_vars=['source','pdexpfac'], value_vars=all_purpose,
         var_name='tour_purpose', value_name='count')

tour_rate = pd.pivot_table(df_person_day,
                            values=all_purpose, index=['source'],
                            aggfunc="sum"). \
    reset_index(). \
    melt(id_vars=['source'], value_vars=all_purpose,
         var_name='tour_purpose', value_name='sample count').\
    merge(df, on=['source','tour_purpose'], how='left')

tour_rate['tour rate'] = tour_rate['count']/tour_rate['pdexpfac']

In [9]:
fig = px.bar(tour_rate, x="tour_purpose", y='tour rate', color="source",
             barmode="group",hover_data=['sample count'],
             title='tour rate by tour purpose')
fig.update_layout(height=300, width=700, font=dict(size=11),
                  yaxis=dict(tickformat=".2%",title='Tour Rate'))
fig.show()