In [1]:
import os
import toml
import pandas as pd
import numpy as np
import plotly.express as px
import validation_data_input
import psrc_theme

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected" # to show plotly figures in quarto HTML file
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

# run get_data.ipynb to load validation data
%store -r validation_data

In [2]:
tour_data = validation_data.tours.copy()
non_m_tour_data = tour_data.loc[tour_data['tour_category']=="non_mandatory"]

## non-mandatory tour start time

In [4]:
df_plot = non_m_tour_data.groupby(['source','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             title="non-mandatory tour start time")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

In [5]:
df_plot = non_m_tour_data.groupby(['source','tour_type','start'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source','tour_type'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="start", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="non-mandatory tour start time by tour purpose")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=1000, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()

## non-mantatory tour duration

In [6]:
non_m_tour_data['duration_cal'] = non_m_tour_data['end'] - non_m_tour_data['start']

df_plot = non_m_tour_data.groupby(['source','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             title="non-mandatory tour duration")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [7]:
df_plot = non_m_tour_data.groupby(['source','tour_type','duration_cal'])[['tour_weight']].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source','tour_type'], group_keys=False)['tour_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot, x="duration_cal", y="percentage", color="source",barmode="group",
             facet_row='tour_type',
             title="non-mandatory tour duration by tour type")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=1000, width=700, yaxis=dict(tickformat=".0%"), xaxis=dict(dtick="1"))
fig.show()