In [1]:
import os
import pandas as pd
import numpy as np
# import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template


In [2]:
%store -r validation_data

config = validation_data.config.copy()
trip = validation_data.trip.copy()

In [3]:
# distance to distance bins
trip['distance_bin'] = pd.cut(trip['travdist'], bins=[0,1,3.5,10,9999],
                                    labels=['0-1', '1-3.5', '3.5-10',
                                            '10+'])
# Create bins: bins of 2 miles up to 60 miles
max_bin = 60
bin_size = 2
trip['bin_60mi'] = pd.cut(trip['travdist'], bins=np.arange(0, max_bin+bin_size, bin_size), labels=[str(i) for i in np.arange(0, max_bin, bin_size)])

In [4]:
def plot_bins(df:pd.DataFrame, filter=None, title_name=None):

    if filter is not None:
        df = df.loc[filter]
    df_plot = df.loc[(df['distance_bin'] != float('nan'))].groupby(['source','distance_bin'])['trexpfac'].sum().reset_index()

    df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trexpfac']. \
        apply(lambda x: 100 * x / float(x.sum()))

    df_plot_ct = df.loc[(df['distance_bin'] != float('nan'))].groupby(['source','distance_bin'])['trexpfac'].count().reset_index().rename(columns={'trexpfac':'sample count'})

    df_plot = df_plot.merge(df_plot_ct, on=['source','distance_bin'])

    if title_name:
        chart_title = "Trip distance by purpose: "+title_name
    else:
        chart_title = "Trip distance"

    fig = px.bar(df_plot, x="distance_bin", y="percentage", color="source",barmode="group",
                facet_col_wrap=2, hover_data=['sample count'],
                title=chart_title)
    fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
    fig.update_layout(height=400, width=700, font=dict(size=11))
    fig.show()

plot_bins(trip)


## Trip Distance by Purpose

In [5]:
plot_bins(trip, trip['dpurp'] == 1, 'work')

In [6]:
plot_bins(trip, trip['dpurp'] == 2, 'school')

In [7]:
plot_bins(trip, trip['dpurp'] == 3, 'escort')

In [8]:
plot_bins(trip, trip['dpurp'] == 4, 'personal business')

In [9]:
plot_bins(trip, trip['dpurp'] == 5, 'shopping')

In [10]:
plot_bins(trip, trip['dpurp'] == 6, 'meal')

In [11]:
plot_bins(trip, trip['dpurp'] == 7, 'social')