The mandatory tour scheduling model selects a tour departure and duration period (and therefore a start and end period as well) for each mandatory tour

In [1]:
import os
import toml
import pandas as pd
#import validation_data_input
import numpy as np
import plotly.express as px
import psrc_theme

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))



In [2]:
# get shared data
%store -r validation_data

#config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

tour_data = validation_data.tours.copy()
m_tour_data = tour_data.loc[tour_data['tour_category']=="mandatory"]
ws_tour_data = m_tour_data.loc[m_tour_data['tour_type'].isin(["work","school"])]

# delete shared data
del validation_data


In [3]:
m_tour_data.source.value_counts()

model results        1760262
survey data             3295
unweighted survey       3295
Name: source, dtype: int64

## tour start time

In [4]:
df_plot = m_tour_data.groupby(['source','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             title="mandatory tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

- number of tours with start time later than 8pm

In [5]:
df_plot.loc[df_plot['start']>=20]

Unnamed: 0,source,start,tour_weight,percentage
15,model results,20.0,4482.0,0.002546
16,model results,21.0,1814.0,0.001031
17,model results,22.0,78.0,4.4e-05
18,model results,23.0,132.0,7.5e-05
34,survey data,20.0,2664.395448,0.001632
35,survey data,21.0,1284.251575,0.000787
36,survey data,22.0,7463.727136,0.004571
37,survey data,23.0,255.980387,0.000157
53,unweighted survey,20.0,9.0,0.002731
54,unweighted survey,21.0,3.0,0.00091


In [6]:
df_plot = ws_tour_data.groupby(['source','tour_type','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['tour_type','source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="work/school tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [7]:
df_plot.loc[df_plot['tour_weight']<30]

Unnamed: 0,source,tour_type,start,tour_weight,percentage
48,survey data,school,15.0,27.838703,7.9e-05
70,unweighted survey,school,5.0,5.0,0.009208
71,unweighted survey,school,6.0,25.0,0.046041
75,unweighted survey,school,10.0,14.0,0.025783
76,unweighted survey,school,11.0,9.0,0.016575
77,unweighted survey,school,12.0,8.0,0.014733
78,unweighted survey,school,13.0,8.0,0.014733
79,unweighted survey,school,14.0,6.0,0.01105
80,unweighted survey,school,15.0,2.0,0.003683
81,unweighted survey,school,16.0,7.0,0.012891


- work/school tour count

In [8]:
ws_count = ws_tour_data.groupby(['source','tour_type'])[['tour_weight']].sum().reset_index()
ws_count['trip_weight_2017_2019'] = ws_count['tour_weight'].apply(lambda x: round(x,1))
# df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
#     apply(lambda x: x / float(x.sum()))
ws_count.sort_values('tour_type', ascending=False)

Unnamed: 0,source,tour_type,tour_weight,trip_weight_2017_2019
1,model results,work,1025680.0,1025680.0
3,survey data,work,1280049.0,1280049.0
5,unweighted survey,work,2752.0,2752.0
0,model results,school,734582.0,734582.0
2,survey data,school,352804.5,352804.5
4,unweighted survey,school,543.0,543.0


## tour duration

In [9]:
m_tour_data['duration_cal'] = m_tour_data['end'] - m_tour_data['start']

df_plot = m_tour_data.groupby(['source','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             title="mandatory tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
ws_tour_data['duration_cal'] = ws_tour_data['end'] - ws_tour_data['start']

df_plot = ws_tour_data.groupby(['source','tour_type','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['tour_type','source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="work/school tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [11]:
df_plot.loc[df_plot['tour_weight']<30]

Unnamed: 0,source,tour_type,duration_cal,tour_weight,percentage
18,model results,school,18.0,14.0,1.9e-05
72,unweighted survey,school,0.0,9.0,0.016575
73,unweighted survey,school,1.0,9.0,0.016575
74,unweighted survey,school,2.0,11.0,0.020258
75,unweighted survey,school,3.0,23.0,0.042357
76,unweighted survey,school,4.0,22.0,0.040516
77,unweighted survey,school,5.0,24.0,0.044199
78,unweighted survey,school,6.0,27.0,0.049724
83,unweighted survey,school,11.0,18.0,0.033149
84,unweighted survey,school,12.0,13.0,0.023941
