The individual mandatory tour frequency model **predicts the number of work and school tours taken by each person with a mandatory DAP**. The primary drivers of mandatory tour frequency are demographics, accessibility-based parameters such as drive time to work, and household automobile ownership.

In [1]:
import os
import toml
import pandas as pd
import psrc_theme
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots

import plotly.io as pio
# to show plotly figures in quarto HTML file
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color"

# run get_data.ipynb to load validation data
%store -r validation_data

config = toml.load(os.path.join(os.getcwd(),'validation_configuration.toml'))

In [2]:
# read data
land_use = validation_data.land_use.copy()
per_data = validation_data.persons_data_uncloned.copy()
hh_data = validation_data.hh_data_uncloned.copy()
tour_data = validation_data.tours.copy()

# add auto_ownership with 4+
hh_data['auto_ownership_simple'] = hh_data['auto_ownership'].apply(lambda x: "4+" if x>=4.0 else str(x))

# add person type labels
ptype_cat = {1: "1: Full-Time Worker",
             2: "2: Part-Time Worker",
             3: "3: University Student",
             4: "4: Non-Working Adult Age <65",
             5: "5: Non-Working Adult Age 65+",
             6: "6: High School Student Age 16+",
             7: "7: Child Age 5-15",
             8: "8: Child Age 0-4"}
per_data['ptype_label'] = per_data['ptype'].map(ptype_cat)

# Create distance to work bins: bins of 2 miles up to 46 miles
max_bin = 46
bin_size = 2
per_data['d_work_bin_46mi'] = pd.cut(per_data['distance_to_work'], bins=np.arange(0, max_bin+bin_size, bin_size), labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

per_data = per_data.merge(hh_data[['household_id','auto_ownership','auto_ownership_simple','hh_weight','source']],
                          how='left', on=['household_id','source']) # get auto ownership from hh data
tour_data = tour_data.merge(per_data, how='left', on=['person_id','household_id','source'])

# keep persons with mandatory tours
m_per_data = per_data.loc[per_data['cdap_activity']=="M"]
m_tour_data = tour_data.loc[(tour_data['cdap_activity']=="M") & (tour_data['tour_category']=="mandatory")]

# total number of persons by source
df_m_person_count = m_per_data.groupby('source')['person_weight'].sum().reset_index()

## mandatory tours per person

$$\text{mandatory tour rates} = {\text{count of all mandatory tours made by M DAP persons} \over \text{M DAP person count}}$$

In [3]:
#| layout-ncol: 2

# mandatory tour rates
df_plot = m_tour_data.groupby(['source','tour_category'])[['tour_weight']].sum().reset_index(). \
    merge(df_m_person_count, how='left', on='source')
df_plot['tour_cat_rate'] = df_plot['tour_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="tour_category", y="tour_cat_rate", color="source",barmode="group", #template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour rates <br>(persons with mandatory DAP only)")
fig.update_layout(height=400, width=350)
fig.show()

# number of mandatory tours
df_plot = m_tour_data[['person_id','source','tour_category','tour_id','person_weight']].groupby(['person_id','source','tour_category','person_weight']).count().reset_index()
df_plot['num_tours'] = df_plot['tour_id']
df_plot = df_plot.groupby(['source','tour_category','num_tours'])[['person_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['tour_category','source'], group_keys=False)['person_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="num_tours", y="percentage", color="source",barmode="group",
             title="number of mandatory tours in a day <br>(persons with mandatory DAP only)")
fig.update_layout(height=400, width=350, yaxis=dict(tickformat=".0%"))
fig.show()

## work & school tours

In [4]:
# total number of worker/student by source
df_worker_count = m_per_data.loc[m_per_data['is_worker']==1].groupby('source')['person_weight'].sum().reset_index()
df_worker_count['worker/student'] = "worker"

df_student_count = m_per_data.loc[m_per_data['is_student']==1].groupby('source')['person_weight'].sum().reset_index()
df_student_count['worker/student'] = "student"

df_worker_student_count = pd.concat([df_worker_count,df_student_count])

- total student/worker count (using `is_worker` and `is_student`, can have intercept)

In [5]:
df_1 = df_worker_student_count.groupby(['source','worker/student'])['person_weight'].sum().reset_index().sort_values('worker/student')
df_1['person_weight'] = df_1['person_weight'].apply(lambda x: '%.2f' % x)
df_1.loc[df_1['source']=="model results"]

Unnamed: 0,source,worker/student,person_weight
0,model results,student,736700.0
1,model results,worker,1019695.0


In [6]:
# workers
w_tour_data = m_tour_data.loc[m_tour_data['is_worker']==1]
# students
s_tour_data = m_tour_data.loc[m_tour_data['is_student']==1]

In [8]:
# worker tour rates
df_plot = w_tour_data.groupby(['source','tour_type'])[['tour_weight']].sum().reset_index(). \
    merge(df_worker_count, how='left', on='source')
df_plot['tour_cat_rate'] = df_plot['tour_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="tour_type", y="tour_cat_rate", color="source",barmode="group",
             title="school/work tour rates per worker")
fig.update_layout(height=400, width=500)
fig.show()

df_plot = s_tour_data.groupby(['source','tour_type'])[['tour_weight']].sum().reset_index(). \
    merge(df_student_count, how='left', on='source')
df_plot['tour_cat_rate'] = df_plot['tour_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="tour_type", y="tour_cat_rate", color="source",barmode="group",
             title="school/work tour rates per student")
fig.update_layout(height=400, width=500)
fig.show()

- total student/worker count (using `is_worker` and `is_student`, can have intercept)

In [9]:
df_1 = df_worker_student_count.groupby(['source','worker/student'])['person_weight'].sum().reset_index().sort_values('worker/student')
df_1['person_weight'] = df_1['person_weight'].apply(lambda x: '%.2f' % x)
df_1.loc[df_1['source']=="model results"]

Unnamed: 0,source,worker/student,person_weight
0,model results,student,736700.0
1,model results,worker,1019695.0


In [10]:
# number of mandatory tours
df_plot = w_tour_data[['person_id','source','tour_type','tour_id','person_weight']].groupby(['person_id','source','tour_type','person_weight']).count().reset_index()

pivoted = df_plot.pivot(index=['person_id','source','person_weight'], columns="tour_type", values="tour_id").fillna(0).reset_index()
df_plot = pivoted.melt(id_vars=['person_id','source','person_weight'], var_name="tour_type")

df_plot['num_tours'] = df_plot['value']
df_plot['ws_weight'] = df_plot['person_weight']
df_plot = df_plot.groupby(['source','tour_type','num_tours'])['ws_weight'].sum().reset_index()
df_plot = df_plot.merge(df_worker_count, how='left', on='source')
df_plot['percentage'] = df_plot['ws_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="num_tours", y="percentage", color="source",barmode="group",
             facet_col="tour_type",facet_row_spacing=0.2,
             title="number of work/school tours for workers in a day")
fig.for_each_annotation(lambda a: a.update(text="tour type="+a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".2%"))
fig.show()

In [11]:
# number of mandatory tours
df_plot = s_tour_data[['person_id','source','tour_type','tour_id','person_weight']].groupby(['person_id','source','tour_type','person_weight']).count().reset_index()

pivoted = df_plot.pivot(index=['person_id','source','person_weight'], columns="tour_type", values="tour_id").fillna(0).reset_index()
df_plot = pivoted.melt(id_vars=['person_id','source','person_weight'], var_name="tour_type")

df_plot['num_tours'] = df_plot['value']
df_plot['ws_weight'] = df_plot['person_weight']
df_plot = df_plot.groupby(['source','tour_type','num_tours'])['ws_weight'].sum().reset_index()
df_plot = df_plot.merge(df_student_count, how='left', on='source')
df_plot['percentage'] = df_plot['ws_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="num_tours", y="percentage", color="source",barmode="group",
             facet_col="tour_type",facet_row_spacing=0.2,
             title="number of work/school tours for students in a day")
fig.for_each_annotation(lambda a: a.update(text="tour type="+a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.update_yaxes(tickformat=".1%")
fig.show()

- total person count by person type (using `ptype`)

In [12]:
df_1 = m_per_data.groupby(['source','ptype_label'])['person_weight'].sum().reset_index().sort_values('ptype_label')
df_1['person_weight'] = df_1['person_weight'].apply(lambda x: '%.2f' % x)
df_1.loc[df_1['source']=="model results"]

Unnamed: 0,source,ptype_label,person_weight
0,model results,1: Full-Time Worker,866629.0
1,model results,2: Part-Time Worker,103886.0
2,model results,3: University Student,106882.0
3,model results,6: High School Student Age 16+,121139.0
4,model results,7: Child Age 5-15,391024.0
5,model results,8: Child Age 0-4,117655.0


In [13]:
# total number of persons by source
df_ptype_count = m_per_data.groupby(['source','ptype_label'])['person_weight'].sum().reset_index()
# mandatory tour rates
df_plot = m_tour_data.groupby(['source','ptype_label','tour_type'])[['tour_weight']].sum().reset_index(). \
    merge(df_ptype_count, how='left', on=['source','ptype_label'])
df_plot['tour_cat_rate'] = df_plot['tour_weight']/df_plot['person_weight']
# df_plot
fig = px.bar(df_plot, x="tour_type", y="tour_cat_rate", color="source",barmode="group",
             facet_col='ptype_label',facet_col_wrap=2,
             title="mandatory tour rates <br>(persons with mandatory DAP only)")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=700, width=670)
fig.show()

- total person count by person type (using `ptype`)

In [14]:
df_1 = m_per_data.groupby(['source','ptype_label'])['person_weight'].sum().reset_index().sort_values('ptype_label')
df_1['person_weight'] = df_1['person_weight'].apply(lambda x: '%.2f' % x)
df_1.loc[df_1['source']=="model results"]

Unnamed: 0,source,ptype_label,person_weight
0,model results,1: Full-Time Worker,866629.0
1,model results,2: Part-Time Worker,103886.0
2,model results,3: University Student,106882.0
3,model results,6: High School Student Age 16+,121139.0
4,model results,7: Child Age 5-15,391024.0
5,model results,8: Child Age 0-4,117655.0


In [15]:
# number of mandatory tours
df_plot = m_tour_data[['person_id','source','ptype_label','tour_type','tour_id','person_weight']].groupby(['person_id','source','ptype_label','tour_type','person_weight']).count().reset_index()

pivoted = df_plot.pivot(index=['person_id','source','ptype_label','person_weight'], columns="tour_type", values="tour_id").fillna(0).reset_index()
df_plot = pivoted.melt(id_vars=['person_id','source','ptype_label','person_weight'], var_name="tour_type")

df_plot['num_tours'] = df_plot['value']
df_plot['ws_weight'] = df_plot['person_weight']
df_plot = df_plot.groupby(['source','ptype_label','tour_type','num_tours'])['ws_weight'].sum().reset_index()
df_plot = df_plot.merge(df_ptype_count, how='left', on=['source','ptype_label'])
df_plot['percentage'] = df_plot['ws_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="num_tours", y="percentage", color="source",barmode="group",
             facet_row="ptype_label",facet_col="tour_type",
             title="number of work/school tours by person type")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(height=1050, width=700)
fig.update_yaxes(tickformat=".1%")
fig.show()

## number of mandatory tours per person by segment

In [16]:
df_1 = m_per_data.groupby(['source','auto_ownership_simple'])['person_weight'].sum().reset_index()
df_2 = tour_data.loc[tour_data['tour_category']=="mandatory"].groupby(['source','auto_ownership_simple'])['tour_weight'].sum().reset_index()

# plot1
df_plot = df_1.merge(df_2, how='left', on=['source','auto_ownership_simple'])
df_plot['tour_rate'] = df_plot['tour_weight']/df_plot['person_weight']

fig = px.bar(df_plot, x="auto_ownership_simple", y="tour_rate", color="source",barmode="group",
             title="mandatory tour rates by auto ownership (persons with mandatory DAP only)")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.update_xaxes(dtick=1)
fig.show()

- auto ownership by student/worker

In [17]:
tour_data['tour_type'].value_counts()

work        1031290
shopping     779046
school       735798
othdiscr     639369
escort       582485
othmaint     436123
eatout       298224
eat          220460
social       194004
business      49598
maint         35454
Name: tour_type, dtype: int64

In [18]:
# worker
df_1 = m_per_data.loc[m_per_data['is_worker']==1].groupby(['source','auto_ownership_simple'])['person_weight'].sum().reset_index()
df_2 = tour_data.loc[(tour_data['is_worker']==1) & (tour_data['tour_type']=="work")]. \
    groupby(['source','auto_ownership_simple','tour_type'])['tour_weight'].sum().reset_index()
df_worker_auto = df_1.merge(df_2, how="left", on=['source','auto_ownership_simple'])

df_worker_auto['tour_rate'] = df_worker_auto['tour_weight']/df_worker_auto['person_weight']
df_worker_auto['person_type'] = "worker"

# student
df_1 = m_per_data.loc[m_per_data['is_student']==1].groupby(['source','auto_ownership_simple'])['person_weight'].sum().reset_index()
df_2 = tour_data.loc[(tour_data['is_student']==1) & (tour_data['tour_type']=="school")]. \
    groupby(['source','auto_ownership_simple','tour_type'])['tour_weight'].sum().reset_index()
df_student_auto = df_1.merge(df_2, how="left", on=['source','auto_ownership_simple'])

df_student_auto['tour_rate'] = df_student_auto['tour_weight']/df_student_auto['person_weight']
df_student_auto['person_type'] = "student"

df_tour_auto = pd.concat([df_worker_auto,df_student_auto])

# df_tour_auto

In [19]:
fig = px.bar(df_tour_auto, x="auto_ownership_simple", y="tour_rate", color="source",barmode="group",
             facet_col='person_type',
             title="number of work/school tour rates per worker/student by auto ownership")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.update_xaxes(dtick=1)
fig.show()

only 20 samples in unweighted students with auto ownership = 4+

In [20]:
df_1 = m_per_data.groupby(['source','d_work_bin_46mi'])['person_weight'].sum().reset_index()
df_2 = tour_data.loc[tour_data['tour_category']=="mandatory"].groupby(['source','d_work_bin_46mi'])['tour_weight'].sum().reset_index()

# plot1
df_plot = df_1.merge(df_2, how='left', on=['source','d_work_bin_46mi'])
df_plot['tour_rate'] = df_plot['tour_weight']/df_plot['person_weight']


fig = px.line(df_plot, x='d_work_bin_46mi', y="tour_rate", color="source",
               title="mandatory tour rates by distance to work (persons with mandatory DAP only)")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700)
fig.show()