The individual mandatory tour frequency model **predicts the number of work and school tours taken by each person with a mandatory DAP**. The primary drivers of mandatory tour frequency are demographics, accessibility-based parameters such as drive time to work, and household automobile ownership.

In [1]:
import os
import toml
import pandas as pd
import validation_data_input
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
# read data
land_use = pd.read_csv(config['p_survey_landuse'])

per_data = validation_data_input.get_persons_data(['distance_to_work','cdap_activity','is_worker','is_student'])
hh_data = validation_data_input.get_households_data(['auto_ownership'])
tour_data = validation_data_input.get_tours_data(['tour_type','tour_category'])

# add auto_ownership with 4+
hh_data['auto_ownership_simple'] = hh_data['auto_ownership'].apply(lambda x: "4+" if x>=4.0 else str(x))

# keep persons with mandatory tours
per_data = per_data.loc[per_data['cdap_activity']=="M"].\
    merge(hh_data[['household_id','auto_ownership','auto_ownership_simple','source']], how='left', on=['household_id','source']) # get auto ownership from hh data

# Create distance to work bins: bins of 2 miles up to 46 miles
max_bin = 46
bin_size = 2
per_data['d_work_bin_46mi'] = pd.cut(per_data['distance_to_work'], bins=np.arange(0, max_bin+bin_size, bin_size), labels=[str(i) for i in np.arange(0, max_bin, bin_size)])


tour_data = tour_data.loc[tour_data['person_id'].isin(per_data['person_id'])].\
    merge(per_data, how='left', on=['person_id','household_id','source'])

# total number of persons by source
df_person_count = per_data.groupby('source')['hh_weight_2017_2019'].sum().reset_index()

## mandatory tours per person

In [3]:
df_plot = tour_data.loc[tour_data['tour_category']=="mandatory"].\
    groupby(['source','tour_category'])[['trip_weight_2017_2019']].sum().reset_index(). \
    merge(df_person_count, how='left', on='source')
df_plot['tour_cat_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="tour_category", y="tour_cat_rate", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour rates <br>(persons with mandatory DAP only)")
fig.update_layout(height=400, width=400, font=dict(size=11))
fig.show()

- student/worker count

In [4]:
# total number of worker/student by source
df_worker_count = per_data.loc[per_data['is_worker']].groupby('source')['hh_weight_2017_2019'].sum().reset_index()
df_worker_count['tour_type'] = "work"
df_worker_count['person_type'] = "worker"

df_student_count = per_data.loc[per_data['is_student']].groupby('source')['hh_weight_2017_2019'].sum().reset_index()
df_student_count['tour_type'] = "school"
df_student_count['person_type'] = "student"

df_worker_student_count = pd.concat([df_worker_count,df_student_count])


df_1 = df_worker_student_count.groupby(['source','person_type'])['hh_weight_2017_2019'].sum().reset_index().sort_values('person_type')
df_1['hh_weight_2017_2019'] = df_1['hh_weight_2017_2019'].apply(lambda x: '%.2f' % x)

df_1

Unnamed: 0,source,person_type,hh_weight_2017_2019
0,model results,student,761147.0
2,survey data,student,300580.2
4,unweighted survey,student,796.0
1,model results,worker,1281644.0
3,survey data,worker,1028814.53
5,unweighted survey,worker,4022.0


- tour rates by student/worker

In [5]:

df_plot = tour_data.loc[tour_data['tour_type'].isin(["work","school"])].\
    groupby(['source','tour_type'])[['trip_weight_2017_2019']].sum().reset_index(). \
    merge(df_worker_student_count, how='left', on=['source','tour_type'])
df_plot['tour_cat_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="tour_type", y="tour_cat_rate", color="source",barmode="group", template="simple_white",
             facet_col='person_type',
             title="school/work tour rates per student/worker")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=500, font=dict(size=11))
fig.update_xaxes(matches=None)
fig.show()

## number of mandatory tours per person by segment

In [6]:
df_1 = per_data.groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()
df_2 = tour_data.loc[tour_data['tour_category']=="mandatory"].groupby(['source','auto_ownership_simple'])['trip_weight_2017_2019'].sum().reset_index()

# plot1
df_plot = df_1.merge(df_2, how='left', on=['source','auto_ownership_simple'])
df_plot['tour_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']

fig = px.bar(df_plot, x="auto_ownership_simple", y="tour_rate", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour rates by auto ownership (persons with mandatory DAP only)")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11))
fig.update_xaxes(dtick=1)
fig.show()

- auto ownership by student/worker

In [7]:
# worker
df_1 = per_data.loc[per_data['is_worker']].groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()
df_2 = tour_data.loc[(tour_data['is_worker']) & (tour_data['tour_type']=="work")]. \
    groupby(['source','auto_ownership_simple','tour_type'])['trip_weight_2017_2019'].sum().reset_index()
df_worker_auto = df_1.merge(df_2, how="left", on=['source','auto_ownership_simple'])

df_worker_auto['tour_rate'] = df_worker_auto['trip_weight_2017_2019']/df_worker_auto['hh_weight_2017_2019']
df_worker_auto['person_type'] = "worker"

# student
df_1 = per_data.loc[per_data['is_student']].groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()
df_2 = tour_data.loc[(tour_data['is_student']) & (tour_data['tour_type']=="school")]. \
    groupby(['source','auto_ownership_simple','tour_type'])['trip_weight_2017_2019'].sum().reset_index()
df_student_auto = df_1.merge(df_2, how="left", on=['source','auto_ownership_simple'])

df_student_auto['tour_rate'] = df_student_auto['trip_weight_2017_2019']/df_student_auto['hh_weight_2017_2019']
df_student_auto['person_type'] = "student"

df_tour_auto = pd.concat([df_worker_auto,df_student_auto])

# df_tour_auto

In [8]:
fig = px.bar(df_tour_auto, x="auto_ownership_simple", y="tour_rate", color="source",barmode="group", template="simple_white",
             facet_col='person_type',
             title="number of work/school tour rates per worker/student by auto ownership")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11))
fig.update_xaxes(dtick=1)
fig.show()

only 20 samples in unweighted students with auto ownership = 4+

In [9]:
# - check person count for unweighted survey
# df_1 = per_data.loc[(per_data['is_worker']) & (per_data['source']=="unweighted survey")].\
#     groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()
# df_1['person_type'] = "worker"
#
# df_2 = per_data.loc[(per_data['is_student']) & (per_data['source']=="unweighted survey")].\
#     groupby(['source','auto_ownership_simple'])['hh_weight_2017_2019'].sum().reset_index()
# df_2['person_type'] = "student"
#
# pd.concat([df_1,df_2])

In [10]:
df_1 = per_data.groupby(['source','d_work_bin_46mi'])['hh_weight_2017_2019'].sum().reset_index()
df_2 = tour_data.loc[tour_data['tour_category']=="mandatory"].groupby(['source','d_work_bin_46mi'])['trip_weight_2017_2019'].sum().reset_index()

# plot1
df_plot = df_1.merge(df_2, how='left', on=['source','d_work_bin_46mi'])
df_plot['tour_rate'] = df_plot['trip_weight_2017_2019']/df_plot['hh_weight_2017_2019']


fig = px.line(df_plot, x='d_work_bin_46mi', y="tour_rate", color="source", template="simple_white",
               title="mandatory tour rates by distance to work (persons with mandatory DAP only)")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11))
fig.show()