In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import polars as pl
from pathlib import Path
import util
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

In [2]:
# %store -r validation_data

# config = validation_data.config.copy()
# input_config = validation_data.input_config.copy()
# person = validation_data.person.copy()
# trip = validation_data.trip.copy()
# person_day = validation_data.person_day.copy()

config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'validation_configuration.toml'))
input_config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'input_configuration.toml'))

data = util.ValidationData(config,input_config,['person', 'trip', 'person_day'])

person = data.person.to_pandas()
trip = data.trip.to_pandas()
person_day = data.person_day.to_pandas()

In [3]:
# Try to load PSRC database if available from Elmer
try:
    trip_elmer = validation_data_input.load_elmer_table("HHSurvey.v_trips_labels", 
                                                        sql="SELECT * FROM HHSurvey.v_trips_labels"+\
                                                            " WHERE survey_year in ("+input_config['base_year']+")")
except:
    trip_elmer = pd.DataFrame()


Total Tours

In [4]:
df = trip.groupby('source')['trexpfac'].sum().reset_index()
df.rename(columns={'trexpfac': 'Total Trips'}, inplace=True)

# If Elmer Data is available, use it instead of the Daysim-formatted survey
if not trip_elmer.empty:
    total_trip_elmer = trip_elmer['trip_weight'].sum()
    df = df.append({'source': 'Full Survey Data', 'Total Trips': total_trip_elmer}, ignore_index=True)
df['Total Trips'] = df['Total Trips'].apply(lambda x: f"{x:,.0f}")
df

Unnamed: 0,source,Total Trips
0,model,15804504
1,survey,13902503
2,survey (2017/2019),15388735
3,Full Survey Data,16125820


In [5]:
df_trip = trip.copy()

mode_cat = {1: "1: walk",
            2: "2: bike",
            3: "3: sov",
            4: "4: hov 2",
            5: "5: hov 3+",
            6: "6: walk to transit",
            7: "7: park-and-ride",
            8: "8: school bus",
            9: "9:tnc"}
df_trip['mode_label'] = df_trip['mode'].map(mode_cat)

dpurp_cat = {1: "1: Work",
              2: "2: School",
              3: "3: Escort",
              4: "4: Personal Business",
              5: "5: Shop",
              6: "6: Meal",
              7: "7: Social"}
df_trip['dpurp_label'] = df_trip['dpurp'].map(dpurp_cat)

In [6]:
def trip_stats(df:pd.DataFrame, var:str, title_cat:str, wid = 700):
    df_plot = df.groupby(['source',var]).agg(
        total=pd.NamedAgg(column="trexpfac", aggfunc="sum"),
        sample_size=pd.NamedAgg(column="trexpfac", aggfunc="size")).reset_index()
    df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['total']. \
        apply(lambda x: x / float(x.sum()))
    
    fig = px.bar(df_plot, x=var, y="percentage", color="source",
                 barmode="group",hover_data=['total','sample_size'],title=title_cat)
    fig.update_layout(height=400, width=700, font=dict(size=11),
                      xaxis = dict(dtick = 1, categoryorder='category ascending'),
                      yaxis=dict(tickformat=".2%"))
    fig.show()

In [7]:
trip_stats(df_trip,'mode_label','trip mode')

In [8]:
trip_stats(df_trip,'dpurp_label','trip purpose')