The mandatory tour scheduling model selects a tour departure and duration period (and therefore a start and end period as well) for each mandatory tour

In [1]:
import os
import toml
import pandas as pd
import validation_data_input
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots

# to show plotly figures in quarto HTML file
import plotly.io as pio

pio.renderers.default = "plotly_mimetype+notebook_connected"

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))
# read data
land_use = pd.read_csv(config['p_survey_landuse'])

per_data = validation_data_input.get_persons_data()
tour_data = validation_data_input.get_tours_data()

# persons with mandatory tours
m_per_data = per_data.loc[per_data['cdap_activity']=="M"]
m_tour_data = tour_data.loc[tour_data['tour_category']=="mandatory"]

# workers
w_tour_data = tour_data.loc[(tour_data['person_id'].isin(per_data.loc[per_data['is_worker']]['person_id'])) & \
                            (tour_data['tour_type'] == "work")]
w_tour_data['person_type'] = "worker"
# students
s_tour_data = tour_data.loc[(tour_data['person_id'].isin(per_data.loc[per_data['is_student']]['person_id'])) & \
                            (tour_data['tour_type'] == "school")]
s_tour_data['person_type'] = "student"
ws_tour_data = pd.concat([w_tour_data,s_tour_data])


# total number of persons by source
df_person_count = m_per_data.groupby('source')['hh_weight_2017_2019'].sum().reset_index()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



- person count with mandatory tours

In [2]:
df_person_count

Unnamed: 0,source,hh_weight_2017_2019
0,model results,1989543.0
1,survey data,1283923.0
2,unweighted survey,4602.0


## tour start time

In [3]:
df_plot = m_tour_data.groupby(['source','start'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

- number of tours with start time later than 8pm

In [4]:
df_plot.loc[df_plot['start']>=20]

Unnamed: 0,source,start,trip_weight_2017_2019,percentage
15,model results,20.0,4842.0,0.002365
16,model results,21.0,1824.0,0.000891
17,model results,22.0,87.0,4.2e-05
18,model results,23.0,114.0,5.6e-05
34,survey data,20.0,1054.453067,0.000765
50,unweighted survey,20.0,4.0,0.000611


In [5]:
df_plot = ws_tour_data.groupby(['source','person_type','tour_type','start'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group", template="simple_white",
             facet_row='person_type',
             # color_discrete_sequence=config['psrc_color'],
             title="work/school tour start time for workers/students")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [6]:
df_plot.loc[df_plot['trip_weight_2017_2019']<30]

Unnamed: 0,source,person_type,tour_type,start,trip_weight_2017_2019,percentage
68,unweighted survey,student,school,5.0,8.0,0.001471
73,unweighted survey,student,school,10.0,27.0,0.004966
74,unweighted survey,student,school,11.0,13.0,0.002391
75,unweighted survey,student,school,12.0,15.0,0.002759
76,unweighted survey,student,school,13.0,11.0,0.002023
77,unweighted survey,student,school,14.0,7.0,0.001287
78,unweighted survey,student,school,15.0,5.0,0.00092
79,unweighted survey,student,school,16.0,7.0,0.001287
80,unweighted survey,student,school,17.0,11.0,0.002023
81,unweighted survey,student,school,18.0,2.0,0.000368


- worker/student count

In [7]:
ws_count = ws_tour_data.groupby(['source','person_type','tour_type'])[['trip_weight_2017_2019']].sum().reset_index()
ws_count['trip_weight_2017_2019'] = ws_count['trip_weight_2017_2019'].apply(lambda x: round(x,1))
# df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
#     apply(lambda x: x / float(x.sum()))
ws_count.sort_values('person_type', ascending=False)

Unnamed: 0,source,person_type,tour_type,trip_weight_2017_2019
1,model results,worker,work,1294436.0
3,survey data,worker,work,1047415.9
5,unweighted survey,worker,work,4713.0
0,model results,student,school,753061.0
2,survey data,student,school,264974.5
4,unweighted survey,student,school,724.0


## tour duration

In [8]:
m_tour_data['duration_cal'] = m_tour_data['end'] - m_tour_data['start']

df_plot = m_tour_data.groupby(['source','duration_cal'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [9]:
ws_tour_data['duration_cal'] = ws_tour_data['end'] - ws_tour_data['start']

df_plot = ws_tour_data.groupby(['source','person_type','tour_type','duration_cal'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group", template="simple_white",
             facet_row='person_type',
             # color_discrete_sequence=config['psrc_color'],
             title="work/school tour duration for workers/students")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [10]:
df_plot.loc[df_plot['trip_weight_2017_2019']<30]

Unnamed: 0,source,person_type,tour_type,duration_cal,trip_weight_2017_2019,percentage
18,model results,student,school,18.0,16.0,8e-06
72,survey data,worker,work,17.0,2.048274,2e-06
74,unweighted survey,student,school,0.0,4.0,0.000736
75,unweighted survey,student,school,1.0,9.0,0.001655
76,unweighted survey,student,school,2.0,19.0,0.003495
86,unweighted survey,student,school,12.0,28.0,0.00515
87,unweighted survey,student,school,13.0,16.0,0.002943
88,unweighted survey,student,school,14.0,4.0,0.000736
89,unweighted survey,student,school,15.0,1.0,0.000184
90,unweighted survey,student,school,16.0,1.0,0.000184
