In [1]:
import polars as pl
import os
import pandas as pd
import numpy as np
# import validation_data_input
import plotly.express as px
import toml
from pathlib import Path
import util
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

In [2]:
# %store -r validation_data

# config = validation_data.config.copy()
# tour = validation_data.tour.copy()

config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'validation_configuration.toml'))
input_config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'input_configuration.toml'))

data = util.ValidationData(config,input_config,['tour'])

tour = data.tour.to_pandas()


In [3]:
# df_person = person.copy()
df_tour = tour.copy()

# ptype_cat = {1: "1: full time worker",
#              2: "2: part time worker",
#              3: "3: non-worker age 65+",
#              4: "4: other non-working adult",
#              5: "5: university student",
#              6: "6: grade school student/child age 16+",
#              7: "7: child age 5-15",
#              8: "8: child age 0-4"}
pdpurp_cat = {1: "Work",
              2: "School",
              3: "Escort",
              4: "Personal Business",
              5: "Shop",
              6: "Meal",
              7: "Social",
              8: "Social",
              9: "Personal Business"}
# df_person['pptyp_label'] = df_person['pptyp'].map(ptype_cat)
# df_person['pptyp_label'] = df_person['pptyp_label'].astype(pd.CategoricalDtype(["1: full time worker","2: part time worker","3: non-worker age 65+","4: other non-working adult","5: university student","6: grade school student/child age 16+","7: child age 5-15","8: child age 0-4"]))

df_tour['pdpurp_label'] = df_tour['pdpurp'].map(pdpurp_cat)
df_tour['pdpurp_label'] = df_tour['pdpurp_label'].astype(pd.CategoricalDtype(["Work","School","Shop","Meal","Escort","Personal Business"]))

df_tour['all_stops'] = df_tour['tripsh1'] + df_tour['tripsh2']
df_tour = df_tour.sort_values(['source'])
# df_tour = df_tour.merge(df_person, how='left', on=['pno','hhno','source'])

In [4]:
df_plot = df_tour.groupby(['tripsh1','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot = df_plot.loc[df_plot['percentage'] > 0.001]

fig = px.bar(df_plot.sort_values(['source']), x="tripsh1", y="percentage", color="source",
             barmode="group",title="outbound: number of stops (excluding shares < 1%)",
             hover_data=['toexpfac'])
fig.update_layout(height=400, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".0%"))
fig.show()

In [5]:
df_plot = df_tour.groupby(['tripsh2','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot = df_plot.loc[df_plot['percentage'] > 0.001]
# df_plot
fig = px.bar(df_plot.sort_values(['source']), x="tripsh2", y="percentage", color="source",
             barmode="group",title="inbound: number of stops (excluding shares < 1%)",
             hover_data=['toexpfac'])
fig.update_layout(height=400, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".0%"))
fig.show()

In [6]:
df_plot = df_tour.groupby(['all_stops','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot = df_plot.loc[df_plot['percentage'] > 0.001]
# df_plot
fig = px.bar(df_plot.sort_values(['source']), x="all_stops", y="percentage", color="source",
             barmode="group",title="all stops: number of stops (excluding shares < 1%)",
             hover_data=['toexpfac'])
fig.update_layout(height=400, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".0%"))
fig.show()

## number of stops by purpose

In [7]:
df_plot = df_tour.groupby(['tripsh1','pdpurp_label','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['pdpurp_label','source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot2 = df_plot.loc[(df_plot['percentage'] > 0.001) & (df_plot['tripsh1'] <= 10)]

fig = px.bar(df_plot2.sort_values(['source','pdpurp_label']),
             x="tripsh1", y="percentage", color="source",barmode="group",
             facet_col='pdpurp_label', facet_col_wrap=2, 
             hover_data=['toexpfac'],
             title="outbound: number of stops by purpose (excluding shares < 1%)")
fig.update_layout(height=800, width=800)
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.show()









- outliers

In [8]:
df_plot.loc[(df_plot['percentage'] > 0.001) & (df_plot['tripsh1'] > 10)]

Unnamed: 0,tripsh1,pdpurp_label,source,toexpfac,percentage


In [9]:
df_plot = df_tour.groupby(['tripsh2','pdpurp_label','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['pdpurp_label','source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot2 = df_plot.loc[df_plot['percentage'] > 0.001]

fig = px.bar(df_plot2.sort_values(['source','pdpurp_label']),
             x="tripsh2", y="percentage", color="source",barmode="group",
             facet_col='pdpurp_label', facet_col_wrap=2,
             hover_data=['toexpfac'],
             title="inbound: number of stops by purpose (excluding shares < 1%)")
fig.update_layout(height=800, width=800)
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.show()









In [10]:
df_plot = df_tour.groupby(['all_stops','pdpurp_label','source'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['pdpurp_label','source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
df_plot2 = df_plot.loc[(df_plot['percentage'] > 0.001) & (df_plot['all_stops'] <= 10)]

fig = px.bar(df_plot2.sort_values(['source','pdpurp_label']),
             x="all_stops", y="percentage", color="source",barmode="group",
             facet_col='pdpurp_label', facet_col_wrap=2,
             hover_data=['toexpfac'],
             title="all stops: number of stops by purpose (excluding shares < 1%)")
fig.update_layout(height=800, width=800)
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.for_each_xaxis(lambda a: a.update(dict(dtick = 1, categoryorder='category ascending')))
fig.show()









- outliers

In [11]:
df_plot.loc[(df_plot['percentage'] > 0.001) & (df_plot['all_stops'] > 10)]

Unnamed: 0,all_stops,pdpurp_label,source,toexpfac,percentage
132,11,Work,model,2963.0,0.001352
143,11,Personal Business,survey,1001.476382,0.002894
