For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) each trip is assigned a purpose based on an observed frequency distribution.
The distribution is segmented by tour purpose, tour direction and person type. Work tours are also segmented by departure or arrival time period.

In [1]:
import os
import toml
import pandas as pd
import numpy as np
import plotly.express as px
import validation_data_input
import psrc_theme

%store -r validation_data

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
# read data
per_data = validation_data.persons_data_uncloned.copy()
trip_data = validation_data.trips.copy()
tour_data = validation_data.tours.copy()

# delete shared data
# del validation_data

# add person type labels
ptype_cat = {1: "1: Full-Time Worker",
             2: "2: Part-Time Worker",
             3: "3: University Student",
             4: "4: Non-Working Adult Age <65",
             5: "5: Non-Working Adult Age 65+",
             6: "6: High School Student Age 16+",
             7: "7: Child Age 5-15",
             8: "8: Child Age 0-4"}
per_data['ptype_label'] = per_data['ptype'].map(ptype_cat)
trip_data['purpose'] = trip_data['purpose'].apply(lambda x: "home" if x == "Home" else x)
trip_data['tour_direction'] = trip_data['outbound'].apply(lambda x: "outbound" if x == True else "inbound")

trip_data = trip_data.merge(per_data, how='left',on=['person_id','household_id','source'])
trip_data = trip_data.merge(tour_data, how='left',on=['person_id','household_id','tour_id','source']) 

In [3]:
df_plot = trip_data.groupby(['source','purpose'])['trip_weight'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['trip_weight']. \
    apply(lambda x: x / float(x.sum()))
# df_plot
fig = px.bar(df_plot, x="purpose", y="percentage", color="source",barmode="group",
             title="Trip purpose")
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=400, width=700, yaxis=dict(tickformat=".1%"))
fig.show()

## Trip purpose by segment

In [4]:
def plot_purpose(df: pd.DataFrame, grp_var: str, grp_var_name: str, n_nol: int, height: int):
    df_plot = df.groupby(['source',grp_var,'purpose'])['trip_weight'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source',grp_var], group_keys=False)['trip_weight']. \
        apply(lambda x: x / float(x.sum()))

    fig = px.bar(df_plot,
                 x="percentage", y="purpose", color="source",barmode="group",
                 facet_col=grp_var, facet_col_wrap=n_nol, orientation='h',
                 title="Trip purpose by " + grp_var_name)
    fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
    fig.update_layout(height=height, width=770)
    fig.for_each_xaxis(lambda a: a.update(tickformat = ".1%"))
    fig.update_yaxes()
    fig.show()

In [5]:
plot_purpose(trip_data,'ptype_label','person type',3,1200)

- removed survey trips with no matching tours

In [6]:
trip_wTour = trip_data[trip_data['tour_id'].notna()].copy()

In [7]:
plot_purpose(trip_wTour,'tour_type','tour purpose',3,1200)

- removed survey trips with no matching tours

In [8]:
df_plot = trip_wTour.groupby(['source','tour_direction','purpose'])['trip_weight'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source','tour_direction'], group_keys=False)['trip_weight']. \
    apply(lambda x: x / float(x.sum()))

fig = px.bar(df_plot,
             x="percentage", y="purpose", color="source",barmode="group",
             facet_col='tour_direction', facet_col_wrap=2, orientation='h',
             category_orders={"tour_direction": ["outbound", "inbound"]},
             title="Trip purpose by " + 'tour_direction')
fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
fig.update_layout(height=500, width=770)
fig.for_each_xaxis(lambda a: a.update(tickformat = ".1%"))
fig.update_yaxes()
fig.show()