In [1]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [2]:
person = validation_data_input.get_data('person',config['survey_dir'])
hh = validation_data_input.get_data('household',config['survey_dir'])

In [3]:
# parcel land use data
df_parcel = pd.read_csv(os.path.join(config['model_dir'],r'outputs\landuse\buffered_parcels.txt'),
                        delim_whitespace=True,
                        usecols=['parcelid','emptot_1','hh_1'])

In [4]:
# Load parcel geography lookups
parcel_geog = pd.read_sql_table(
    'parcel_'+config['base_year']+'_geography',
    'sqlite:///'+config['model_dir']+'/inputs/db/soundcast_inputs.db',
    columns=['ParcelID','CountyName','rg_proposed']
    )

In [5]:
hh = hh.merge(df_parcel, left_on='hhparcel', right_on='parcelid', how='left')

In [6]:
# Group income, hh density, and employment density into 4 groups
var_group = hh.loc[hh['source'] == 'model',['hhincome','emptot_1','hh_1']].quantile([.125, .25, .50, .75])

# var_group

In [7]:
# Group ages
survey_ages = person.loc[person['source'] == 'survey', 'pagey'].unique()
survey_ages.sort()
survey_ages = np.insert(survey_ages, 0, 0)
survey_ages = np.append(survey_ages, 999)
# Create labels

person['age'] = pd.cut(person['pagey'], bins=survey_ages)

In [8]:
hh['hhincome_group'] = pd.cut(hh['hhincome'],bins=[-9999999.0] + var_group['hhincome'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])
# add hh density groups
hh['hh_density_group'] = pd.cut(hh['hh_1'],bins=[-9999999.0] + var_group['hh_1'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])
# add employment density groups
hh['emp_density_group'] = pd.cut(hh['emptot_1'],bins=[-9999999.0] + var_group['emptot_1'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])


In [9]:
person = person.merge(hh, on=['hhno'], how='left')

In [10]:
person = person.merge(parcel_geog, left_on='hhparcel', right_on='ParcelID', how='left')

In [11]:
person.drop('source_x', axis=1, inplace=True)
person.rename(columns={'source_y': 'source'}, inplace=True)

In [12]:
df = person.pivot_table(index='source', columns='ptpass', aggfunc='sum', values='psexpfac')
df_tot = person.groupby('source').sum()[['psexpfac']].reset_index()
df = df.merge(df_tot, on='source')
df['no pass'] = df[0]/df['psexpfac']
df['% owns transit pass'] = df[1]/df['psexpfac']
df[['source','% owns transit pass']]

Unnamed: 0,source,% owns transit pass
0,model,0.165909
1,survey,0.237584


In [13]:

# auto ownership in Income groups
def plot_transit_pass(df:pd.DataFrame, var:str, title_cat:str, sub_name:str):
    df_plot = df.groupby(['source',var,'ptpass'])['psexpfac'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source',var], group_keys=False)['psexpfac'].\
        apply(lambda x: x / float(x.sum()))

    df_plot_ct = df.groupby(['source',var,'ptpass'])['psexpfac'].count().reset_index(). \
        rename(columns={'psexpfac':'sample count'})
    df_plot = df_plot.merge(df_plot_ct, on=['source',var,'ptpass'])

    # Only show the share with a pass (ptpass==1)
    df_plot = df_plot[df_plot['ptpass'] == 1]

    fig = px.bar(df_plot, x="ptpass", y="percentage", color="source",
                 facet_col=var, barmode="group",
                 hover_data=['sample count'],
                 title="Transit Pass Ownership by "+ title_cat)
    fig.for_each_annotation(lambda a: a.update(text = sub_name + "=<br>" + a.text.split("=")[-1]))
    fig.update_xaxes(title_text="ptpass")
    fig.update_layout(height=400, width=800, font=dict(size=11),
                      yaxis=dict(tickformat=".2%"))
    fig.for_each_yaxis(lambda a: a.update(tickformat = ".2%"))
    fig.show()

## Pass Ownership by Home Location

In [14]:
plot_transit_pass(person, 'CountyName', 'Home County', 'County')

In [15]:
plot_transit_pass(person, 'rg_proposed', 'Regional Geography', 'Geog')

## Pass Ownership by Person/Household Characteristics

In [16]:
plot_transit_pass(person, 'pptyp', 'Person Type', 'student type')

In [17]:
plot_transit_pass(person, 'age', 'Age', 'Age')

In [18]:
plot_transit_pass(person, 'hh_density_group', 'Household Density', 'HH Density at Home')

In [19]:
plot_transit_pass(person, 'emp_density_group', 'Employment Density', 'Emp Density at Home')