In [1]:
import os
import toml
import pandas as pd
import numpy as np
import plotly.express as px
import validation_data_input
import psrc_theme

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

# run get_data.ipynb to load validation data
%store -r validation_data

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
tour_data = validation_data.tours.copy()
non_m_tour_data = tour_data.loc[tour_data['tour_category']=="non_mandatory"]

In [3]:
# distance to school bins from workplace_location.csv
non_m_tour_data['tour_distance'] = np.where(non_m_tour_data['source'].isin(["survey data","unweighted survey"]), non_m_tour_data['distance'], non_m_tour_data['tour_distance_one_way'])
non_m_tour_data['distance_bin'] = pd.cut(non_m_tour_data['tour_distance'], bins=[0,1,2,5,15,9999],
                                          labels=['dist_0_1', 'dist_1_2', 'dist_2_5',
                                                  'dist_5_15', 'dist_15_up'])
# Create bins: bins of 2 miles up to 60 miles
max_bin = 60
bin_size = 2
non_m_tour_data['distance_bin_60mi'] = pd.cut(non_m_tour_data['tour_distance'], bins=np.arange(0, max_bin+bin_size, bin_size), labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

KeyError: 'distance'

## non-mandatory tour distance

In [4]:
# plot1
df_plot = non_m_tour_data.loc[non_m_tour_data['distance_bin'] != float('nan')].groupby(['source','distance_bin'])['trip_weight_2017_2019'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))
# df_plot
fig1 = px.bar(df_plot, x='distance_bin', y="percentage", color="source", barmode="group",
              title="share of non-mandatory tours by distance bin")
fig1.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig1.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".1%"))
fig1.show()

In [5]:
df_plot = non_m_tour_data.loc[non_m_tour_data['distance_bin_60mi'].notna()].groupby(['source','distance_bin_60mi'])['trip_weight_2017_2019'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig2 = px.line(df_plot, x='distance_bin_60mi', y="percentage", color="source", title="tour distance")
fig2.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig2.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".1%"))
fig2.show()

## non-mantatory tour distance by purpose

In [6]:
df_plot = non_m_tour_data.loc[non_m_tour_data['distance_bin'] != float('nan')].groupby(['source','tour_type','distance_bin'])['trip_weight_2017_2019'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source','tour_type'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))
# df_plot
fig1 = px.bar(df_plot, x='distance_bin', y="percentage", color="source", barmode="group",
              facet_row='tour_type',
              title="tour distance bin by tour purpose")
fig1.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig1.update_layout(height=1000, width=700, font=dict(size=11), yaxis=dict(tickformat=".1%"))
fig1.show()