In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), '..\\..\\..\\..\\configuration', 'validation_configuration.toml'))

In [2]:
tour = validation_data_input.get_data('tour')
person = validation_data_input.get_data('person')

## number of tours per person

In [3]:
df_person = person.copy()
df_tour = tour.copy()

ptype_cat = {1: "1: full time worker",
             2: "2: part time worker",
             3: "3: non-worker age 65+",
             4: "4: other non-working adult",
             5: "5: university student",
             6: "6: grade school student/child age 16+",
             7: "7: child age 5-15",
             8: "8: child age 0-4"}
pdpurp_cat = {1: "Work",
              2: "School",
              3: "Escort",
              4: "Personal Business",
              5: "Shop",
              6: "Meal",
              7: "Social"}
df_person['pptyp_label'] = df_person['pptyp'].map(ptype_cat)
df_person['pptyp_label'] = df_person['pptyp_label'].astype(pd.CategoricalDtype(ptype_cat.values()))

df_tour['pdpurp_label'] = df_tour['pdpurp'].map(pdpurp_cat)
df_tour['pdpurp_label'] = df_tour['pdpurp_label'].astype(pd.CategoricalDtype(pdpurp_cat.values()))

df_tour = df_tour.merge(df_person, how='left', on=['pno','hhno','source'])

In [5]:
df_plot = df_tour.groupby(['source'])['toexpfac'].sum().reset_index().\
    merge(df_person.groupby(['source'])['psexpfac'].sum().reset_index(), how='left', on=['source'])
df_plot['average tour'] = df_plot['toexpfac']/df_plot['psexpfac']
df_plot['person'] = ''

fig = px.bar(df_plot, x="person", y="average tour", color="source",
             barmode="group",title="number of tours per person")
fig.update_layout(height=400, width=400, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".3"))
fig.show()

## percent of tours by purpose

In [6]:
df_plot = df_tour.groupby(['source','pdpurp_label'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))
# df_plot
df_plot_ct = df_tour.groupby(['source','pdpurp_label'])['toexpfac'].count().reset_index(). \
    rename(columns={'toexpfac':'sample count'})
df_plot = df_plot.merge(df_plot_ct, on=['source','pdpurp_label'])

fig = px.bar(df_plot.sort_values(by=['source']), x="pdpurp_label", y="percentage", color="source",
             barmode="group",hover_data=['sample count'],title="tour purpose")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis=dict(tickformat=".2%"))
fig.show()

## number of tours per person by segment

In [7]:
def plot_segment(df, tour_group_var,person_group_var,title_name):

    # df_plot
    fig = px.bar(df, x=tour_group_var[1], y="average tour", color="source",
                 barmode="group",title=title_name)
    fig.update_layout(height=400, width=700, font=dict(size=11),
                      yaxis=dict(tickformat=".2"))
    fig.show()

df_plot = df_tour.groupby(['source','pdpurp_label'])['toexpfac'].sum().reset_index(). \
    merge(df_person.groupby(['source'])['psexpfac'].sum().reset_index(), how='left', on=['source'])
df_plot['average tour'] = df_plot['toexpfac']/df_plot['psexpfac']
# df_plot
plot_segment(df_plot, tour_group_var=['source','pdpurp_label'],person_group_var=['source'],
             title_name="number of tours per person by tour purpose")


In [8]:
# plot_segment(tour_group_var=['source','pptyp_label'],person_group_var=['source','pptyp_label'],
#              title_name="number of tours per person by person type")

df_plot = df_tour.groupby(['source','pptyp_label'])['toexpfac'].sum().reset_index(). \
    merge(df_person.groupby(['source','pptyp_label'])['psexpfac'].sum().reset_index(), how='left', on=['source','pptyp_label'])
df_plot['average tour'] = df_plot['toexpfac']/df_plot['psexpfac']
df_plot
plot_segment(df_plot, tour_group_var=['source','pptyp_label'],person_group_var=['source','pptyp_label'],
             title_name="number of tours per person by person type")

In [9]:
wk_tour = df_tour.loc[df_tour['pdpurp']==1].copy()

df_plot = wk_tour.groupby(['source','pptyp_label'])['toexpfac'].sum().reset_index(). \
    merge(df_person.groupby(['source','pptyp_label'])['psexpfac'].sum().reset_index(), how='left', on=['source','pptyp_label'])
df_plot['average tour'] = df_plot['toexpfac']/df_plot['psexpfac']
# df_plot
fig = px.bar(df_plot, x='pptyp_label', y="average tour", color="source",
             barmode="group",title="number of work tours per person by person type")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  yaxis=dict(tickformat=".2"))
fig.show()

### Tour by Purpose and Person Type

In [10]:
def plot_by_pptyp(df_tour, person_type):
    df_plot = df_tour[df_tour['pptyp']==int(person_type)].groupby(['source','pdpurp_label'])['toexpfac'].sum().reset_index(). \
    merge(df_person[df_person['pptyp']==int(person_type)].groupby(['source'])['psexpfac'].sum().reset_index(), how='left', on=['source'])
    df_plot['average tour'] = df_plot['toexpfac']/df_plot['psexpfac']
    # df_plot

    plot_segment(df_plot, tour_group_var=['source','pdpurp_label'],person_group_var=['source'],
                title_name="number of tours per person for person type "+str(person_type))

In [11]:
plot_by_pptyp(df_tour, '1')

In [12]:
plot_by_pptyp(df_tour, '2')

In [13]:
plot_by_pptyp(df_tour, '3')

In [14]:
plot_by_pptyp(df_tour, '4')

In [15]:
plot_by_pptyp(df_tour, '5')

In [16]:
plot_by_pptyp(df_tour, '6')

In [17]:
plot_by_pptyp(df_tour, '7')

In [18]:
plot_by_pptyp(df_tour, '8')