The mandatory tour scheduling model selects a tour departure and duration period (and therefore a start and end period as well) for each mandatory tour

In [1]:
import os
import toml
import pandas as pd
#import validation_data_input
import numpy as np
import plotly.express as px
import psrc_theme

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))



In [2]:
# get shared data
%store -r validation_data

#config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

tour_data = validation_data.tours.copy()
m_tour_data = tour_data.loc[tour_data['tour_category']=="mandatory"]
ws_tour_data = m_tour_data.loc[m_tour_data['tour_type'].isin(["work","school"])]

# delete shared data
del validation_data


In [3]:
m_tour_data.source.value_counts()

model results        1755592
survey data             3307
unweighted survey       3307
Name: source, dtype: int64

## tour start time

In [4]:
df_plot = m_tour_data.groupby(['source','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             title="mandatory tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

- number of tours with start time later than 8pm

In [5]:
df_plot.loc[df_plot['start']>=20]

Unnamed: 0,source,start,tour_weight,percentage
15,model results,20.0,4447.0,0.002533
16,model results,21.0,1793.0,0.001021
17,model results,22.0,80.0,4.6e-05
18,model results,23.0,134.0,7.6e-05
34,survey data,20.0,2664.395448,0.001612
35,survey data,21.0,1284.251575,0.000777
36,survey data,22.0,7463.727136,0.004515
37,survey data,23.0,255.980387,0.000155
53,unweighted survey,20.0,9.0,0.002721
54,unweighted survey,21.0,3.0,0.000907


In [6]:
df_plot = ws_tour_data.groupby(['source','tour_type','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['tour_type','source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="work/school tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [7]:
df_plot.loc[df_plot['tour_weight']<30]

Unnamed: 0,source,tour_type,start,tour_weight,percentage
70,unweighted survey,school,5.0,5.0,0.009191
71,unweighted survey,school,6.0,26.0,0.047794
75,unweighted survey,school,10.0,14.0,0.025735
76,unweighted survey,school,11.0,9.0,0.016544
77,unweighted survey,school,12.0,8.0,0.014706
78,unweighted survey,school,13.0,8.0,0.014706
79,unweighted survey,school,14.0,6.0,0.011029
80,unweighted survey,school,15.0,3.0,0.005515
81,unweighted survey,school,16.0,7.0,0.012868
82,unweighted survey,school,17.0,6.0,0.011029


- work/school tour count

In [8]:
ws_count = ws_tour_data.groupby(['source','tour_type'])[['tour_weight']].sum().reset_index()
ws_count['trip_weight_2017_2019'] = ws_count['tour_weight'].apply(lambda x: round(x,1))
# df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
#     apply(lambda x: x / float(x.sum()))
ws_count.sort_values('tour_type', ascending=False)

Unnamed: 0,source,tour_type,tour_weight,trip_weight_2017_2019
1,model results,work,1022897.0,1022897.0
3,survey data,work,1292836.0,1292836.3
5,unweighted survey,work,2763.0,2763.0
0,model results,school,732695.0,732695.0
2,survey data,school,360098.8,360098.8
4,unweighted survey,school,544.0,544.0


## tour duration

In [9]:
m_tour_data['duration_cal'] = m_tour_data['end'] - m_tour_data['start']

df_plot = m_tour_data.groupby(['source','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             title="mandatory tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
ws_tour_data['duration_cal'] = ws_tour_data['end'] - ws_tour_data['start']

df_plot = ws_tour_data.groupby(['source','tour_type','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['tour_type','source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="work/school tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis1=dict(tickformat=".0%"), yaxis2=dict(tickformat=".0%", matches=None), xaxis=dict(dtick="1"))
fig.show()

- bars with tour counts fewer than 30

In [11]:
df_plot.loc[df_plot['tour_weight']<30]

Unnamed: 0,source,tour_type,duration_cal,tour_weight,percentage
18,model results,school,18.0,13.0,1.8e-05
72,unweighted survey,school,0.0,10.0,0.018382
73,unweighted survey,school,1.0,10.0,0.018382
74,unweighted survey,school,2.0,11.0,0.020221
75,unweighted survey,school,3.0,23.0,0.042279
76,unweighted survey,school,4.0,22.0,0.040441
77,unweighted survey,school,5.0,24.0,0.044118
78,unweighted survey,school,6.0,27.0,0.049632
83,unweighted survey,school,11.0,18.0,0.033088
84,unweighted survey,school,12.0,13.0,0.023897
