The mandatory tour scheduling model selects a tour departure and duration period (and therefore a start and end period as well) for each mandatory tour

In [1]:
import os
import toml
import pandas as pd
import validation_data_input
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots

# to show plotly figures in quarto HTML file
import plotly.io as pio

pio.renderers.default = "plotly_mimetype+notebook_connected"

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))
# read data
land_use = pd.read_csv(config['p_survey_landuse'])

per_data = validation_data_input.get_persons_data()
tour_data = validation_data_input.get_tours_data()

m_per_data = per_data.loc[per_data['cdap_activity']=="M"]
m_tour_data = tour_data.loc[tour_data['tour_category']=="mandatory"]

# total number of persons by source
df_person_count = m_per_data.groupby('source')['hh_weight_2017_2019'].sum().reset_index()

- person count with mandatory tours

In [2]:
df_person_count

Unnamed: 0,source,hh_weight_2017_2019
0,model results,1989543.0
1,survey data,1283923.0
2,unweighted survey,4602.0


## tour start time

In [3]:
df_plot = m_tour_data.groupby(['source','start'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

- number of tours with start time later than 8pm

In [4]:
df_plot.loc[df_plot['start']>=20]

Unnamed: 0,source,start,trip_weight_2017_2019,percentage
15,model results,20.0,4842.0,0.002365
16,model results,21.0,1824.0,0.000891
17,model results,22.0,87.0,4.2e-05
18,model results,23.0,114.0,5.6e-05
34,survey data,20.0,1054.453067,0.000765
50,unweighted survey,20.0,4.0,0.000611


## tour duration

In [5]:
m_tour_data['duration_cal'] = m_tour_data['end'] - m_tour_data['start']

df_plot = m_tour_data.groupby(['source','duration_cal'])[['trip_weight_2017_2019']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight_2017_2019']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group", template="simple_white",
             # color_discrete_sequence=config['psrc_color'],
             title="mandatory tour tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, font=dict(size=11), yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

