In [2]:
import os
import toml
import pandas as pd
import numpy as np
import plotly.express as px
import validation_data_input
import psrc_theme
# from plotly.subplots import make_subplots

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
# read data
land_use = pd.read_csv(config['p_survey_landuse'])

per_data = validation_data_input.get_persons_data(['distance_to_work', 'num_non_mand','cdap_activity','is_worker','is_student','ptype'])
hh_data = validation_data_input.get_households_data(['auto_ownership'])
tour_data = validation_data_input.get_tours_data(['tour_type','tour_category'])

# add auto_ownership with 4+
hh_data['auto_ownership_simple'] = hh_data['auto_ownership'].apply(lambda x: "4+" if x>=4.0 else str(x))

# add person type labels
ptype_cat = {1: "1: Full-Time Worker",
             2: "2: Part-Time Worker",
             3: "3: University Student",
             4: "4: Non-Working Adult Age <65",
             5: "5: Non-Working Adult Age 65+",
             6: "6: High School Student Age 16+",
             7: "7: Child Age 5-15",
             8: "8: Child Age 0-4"}
per_data['ptype_label'] = per_data['ptype'].map(ptype_cat)

# Create distance to work bins: bins of 2 miles up to 46 miles
max_bin = 46
bin_size = 2
per_data['d_work_bin_46mi'] = pd.cut(per_data['distance_to_work'], bins=np.arange(0, max_bin+bin_size, bin_size), labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

# number of mandatory tours
per_data['num_non_mand_simple'] = per_data['num_non_mand'].apply(lambda x: "4+" if x>=4.0 else str(x))

per_data = per_data.merge(hh_data[['household_id','auto_ownership','auto_ownership_simple','source']],
                          how='left', on=['household_id','source']) # get auto ownership from hh data
tour_data = tour_data.merge(per_data, how='left', on=['person_id','household_id','source'])

non_m_tour_data = tour_data.loc[tour_data['tour_category']=="non_mandatory"]
#
# # total number of persons by source
df_person_count = per_data.groupby('source')['hh_weight_2017_2019'].sum().reset_index()

## non-mandatory tours per person

In [3]:
df_plot = non_m_tour_data.\
    groupby(['source','tour_category'])[['trip_weight_2017_2019']].sum().reset_index(). \
    merge(df_person_count, how='left', on='source')
df_plot['tour_cat_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="tour_category", y="tour_cat_rate", color="source",barmode="group",
             title="non-mandatory tour rates")
fig.update_layout(height=400, width=500, font=dict(size=11))
fig.show()

In [4]:
df_plot = per_data.groupby(['num_non_mand','source'])['hh_weight_2017_2019'].sum().reset_index()

df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['hh_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="num_non_mand", y="percentage", color="source",barmode="group",
             title="number of non mandatory tours in a day")
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".2%"))
fig.show()

## Number of non-mandatory tour by segment

In [5]:
df_ptype_count = per_data.groupby(['source','ptype_label'])['hh_weight_2017_2019'].sum().reset_index()

df_ptype_count.loc[df_ptype_count['source']=="model results"]

Unnamed: 0,source,ptype_label,hh_weight_2017_2019
0,model results,1: Full-Time Worker,1610659.0
1,model results,2: Part-Time Worker,296080.0
2,model results,3: University Student,160485.0
3,model results,4: Non-Working Adult Age <65,613336.0
4,model results,5: Non-Working Adult Age 65+,473685.0
5,model results,6: High School Student Age 16+,160696.0
6,model results,7: Child Age 5-15,519085.0
7,model results,8: Child Age 0-4,219128.0


- tour rates

In [6]:
df_plot = non_m_tour_data. \
    groupby(['source','ptype_label','tour_category'])[['trip_weight_2017_2019']].sum().reset_index(). \
    merge(df_ptype_count, how='left', on=['source','ptype_label'])
df_plot['tour_cat_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="ptype_label", y="tour_cat_rate", color="source",barmode="group",
             # facet_col="ptype_label",facet_col_wrap=2,
             title="non-mandatory tour rates by person type")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.show()

 - number of non-mandatory tours

In [7]:
# number of mandatory tours
df_plot = per_data.groupby(['source','ptype_label','num_non_mand_simple'])['hh_weight_2017_2019'].sum().reset_index()

df_plot['percentage'] = df_plot.groupby(['source','ptype_label'], group_keys=False)['hh_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="num_non_mand_simple", y="percentage", color="source",barmode="group",
             facet_col="ptype_label",facet_col_wrap=2,
             title="number of non-mandatory tours by person type")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=1050, width=700)
fig.update_yaxes(tickformat=".1%")
fig.show()

- tour rates

In [8]:
df_auto_count = per_data.groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()

df_plot = non_m_tour_data. \
    groupby(['source','auto_ownership_simple','tour_category'])[['trip_weight_2017_2019']].sum().reset_index(). \
    merge(df_auto_count, how='left', on=['source','auto_ownership_simple'])
df_plot['tour_cat_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="auto_ownership_simple", y="tour_cat_rate", color="source",barmode="group",
             # facet_col="ptype_label",facet_col_wrap=2,
             title="non-mandatory tour rates by auto ownership")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.update_yaxes(tickformat=".1%")
fig.show()

- number of non-mandatory tours

In [9]:
df_1 = per_data.groupby(['source','auto_ownership_simple','num_non_mand_simple'])['hh_weight_2017_2019'].sum().reset_index()
df_1['percentage'] = df_1.groupby(['auto_ownership_simple','source'], group_keys=False)['hh_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_1, x="num_non_mand_simple", y="percentage", color="source",barmode="group",
             facet_col="auto_ownership_simple",
             title="number of non-mandatory tours by auto ownership")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=400, width=1200)
fig.update_yaxes(tickformat=".1%")
fig.show()