In [137]:
import os
import pandas as pd
import numpy as np
import validation_data_input
import plotly.express as px
import toml

In [138]:
config = toml.load(os.path.join(os.getcwd(), 'validation_configuration.toml'))

In [139]:
df_acs = pd.read_csv(config['p_acs_auto_ownership'])
df_lookup = pd.read_csv(config['p_maz_bg_lookup'])
df_acs = df_acs.merge(df_lookup, on='block_group_id')

In [140]:
df_acs.columns

Index(['OBJECTID', 'geography', 'id', 'cars_none_control', 'cars_one_control',
       'cars_two_or_more_control', 'block_group_id', 'MAZ', 'TAZ'],
      dtype='object')

In [141]:
df_lookup

Unnamed: 0,MAZ,TAZ,block_group_id
0,1,404.0,533367001
1,2,387.0,533369001
2,3,790.0,5333116003
3,4,835.0,5333116005
4,5,834.0,5333116005
...,...,...,...
66229,66221,,5361532021
66230,66226,,5361537001
66231,66228,,5361537001
66232,66229,,5361537001


In [142]:
hh = validation_data_input.get_data('household')

In [143]:
df_parcel = pd.read_csv(os.path.join(config['model_dir'],r'outputs\landuse\buffered_parcels.txt'),
                        delim_whitespace=True,
                       usecols=['parcelid','emptot_1','hh_1'])

In [144]:
hh = hh.merge(df_parcel, left_on='hhparcel', right_on='parcelid', how='left')

In [145]:
hh = hh.merge(df_acs, how='left', left_on='hhtaz', right_on='TAZ')


In [146]:
# hh.columns

In [147]:
# Group income, hh density, and employment density into 4 groups
var_group = hh.loc[hh['source'] == 'model',['hhincome','emptot_1','hh_1']].quantile([.125, .25, .50, .75])

var_group

Unnamed: 0,hhincome,emptot_1,hh_1
0.125,25500.0,0.0,56.057172
0.25,45000.0,2.249011,127.221355
0.5,84800.0,62.27201,272.113138
0.75,136500.0,315.227122,536.07141


In [148]:
# data manipulation
# hhwkrs is not always accurate; recalculate from part and full time workers
hh['hhwkrs'] = hh['hhftw']+hh['hhptw']
hh['hhwkrs']
# Add column for adults
hh['adults'] = hh['hhsize']-hh['hh515']-hh['hhcu5']-hh['hhhsc']

# add income group
hh['hhincome_group'] = pd.cut(hh['hhincome'],bins=[-9999999.0] + var_group['hhincome'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])
# add hh density groups
hh['hh_density_group'] = pd.cut(hh['hh_1'],bins=[-9999999.0] + var_group['hh_1'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])
# add employment density groups
hh['emp_density_group'] = pd.cut(hh['emptot_1'],bins=[-9999999.0] + var_group['emptot_1'].tolist() + [9999999.0], labels=['very low', 'low', 'medium', 'medium-high', 'high'])

# add auto_ownership with 4+
hh['auto_ownership_simple'] = np.where(hh['hhvehs']>=4, "4+", hh['hhvehs'])
# add auto_ownership with 2+
hh['auto_ownership_2'] = np.where(hh['hhvehs']<2, hh['hhvehs'], "2+")
# add hhsize with 4+
hh['hhsize_simple'] = np.where(hh['hhsize']>=4, "4+", hh['hhsize'])
# add num_workers with 4+
hh['num_workers_simple'] = np.where(hh['hhwkrs']>=4, "4+", hh['hhwkrs'])
# add num_adults with 4+
hh['num_adults_simple'] = np.where(hh['adults']>=4, "4+",hh['adults'])

In [149]:

df_plot = hh.groupby(['source','auto_ownership_simple'])['hhexpfac'].sum().reset_index()

df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['hhexpfac'].\
        apply(lambda x: 100 * x / float(x.sum()))
df_plot['source'] = df_plot['source'].astype(pd.CategoricalDtype(['model', 'survey']))

fig = px.bar(df_plot.sort_values(by=['source']), x="auto_ownership_simple", y="percentage", color="source",
             barmode="group",template="simple_white",
             title="Auto ownership")
fig.update_layout(height=400, width=700, font=dict(size=11))
fig.show()

In [150]:

# auto ownership in Income groups
def plot_auto(df:pd.DataFrame, var:str, title_cat:str, sub_name:str):
    print(f"n=\n"
          f"{df.loc[df['source']=='model',var].value_counts()[df[var].sort_values().unique()]}")
    df_plot = df.groupby(['source',var,'auto_ownership_simple'])['hhexpfac'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source',var], group_keys=False)['hhexpfac'].\
        apply(lambda x: 100 * x / float(x.sum()))

    fig = px.bar(df_plot, x="auto_ownership_simple", y="percentage", color="source",
                 facet_col=var, barmode="group",template="simple_white",
                 title="Auto ownership by "+ title_cat)
    fig.for_each_annotation(lambda a: a.update(text = sub_name + "=<br>" + a.text.split("=")[-1]))
    fig.update_xaxes(title_text="n of cars")
    fig.update_layout(height=400, width=700, font=dict(size=11))
    fig.show()

In [151]:
plot_auto(hh,'hhincome_group','Income Level', 'Income')

n=
very low       3820218
low            3799849
medium         7620034
medium-high    7618335
high           7612312
Name: hhincome_group, dtype: int64


In [152]:
plot_auto(hh,'hhincome_group','income level', 'Income')

n=
very low       3820218
low            3799849
medium         7620034
medium-high    7618335
high           7612312
Name: hhincome_group, dtype: int64


In [153]:
plot_auto(hh,'hhsize_simple','household size', 'HH size')

n=
1      7733008
2     10195535
3      5024171
4+     7518034
Name: hhsize_simple, dtype: int64


In [154]:
plot_auto(hh.loc[hh['num_adults_simple']!="0"],'num_adults_simple','number of adults','num adults')

n=
1      8740678
2     15347315
3      4534417
4+     1842879
Name: num_adults_simple, dtype: int64


In [155]:
plot_auto(hh,'num_workers_simple','number of workers','num workers')

n=
0      6904001
1     12182373
2      9788157
3      1470634
4+      125583
Name: num_workers_simple, dtype: int64


In [156]:
plot_auto(hh.dropna(subset=['hh_density_group']),'hh_density_group','household density','density')

n=
very low       3808859
low            3808851
medium         7617761
medium-high    7618155
high           7617122
Name: hh_density_group, dtype: int64


In [157]:
plot_auto(hh.dropna(subset=['emp_density_group']),'emp_density_group','employment density','density')

n=
very low       6371551
low            1246193
medium         7617669
medium-high    7617786
high           7617549
Name: emp_density_group, dtype: int64


## Validate auto ownership with ACS vehicle ownership data

In [160]:

df = hh.groupby(['source','block_group_id','auto_ownership_2'])['hhexpfac'].sum().reset_index()

df['percentage'] = df.groupby(['source','block_group_id'], group_keys=False)['hhexpfac'].\
    apply(lambda x: 100 * x / float(x.sum()))

# acs auto ownership data
acs_auto_ownership = pd.read_csv(config['p_acs_auto_ownership'], usecols=['cars_none_control', 'cars_one_control', 'cars_two_or_more_control', 'block_group_id'])

# calculate percentage of households having 0, 1 or 2+ vehicle(s) in each block group
acs_auto_ownership['total'] = acs_auto_ownership['cars_one_control'] + acs_auto_ownership['cars_two_or_more_control'] + acs_auto_ownership['cars_none_control']
acs_auto_ownership['0'] = 100 * acs_auto_ownership['cars_none_control']/acs_auto_ownership['total']
acs_auto_ownership['1'] = 100 * acs_auto_ownership['cars_one_control']/acs_auto_ownership['total']
acs_auto_ownership['2+'] = 100 * acs_auto_ownership['cars_two_or_more_control']/acs_auto_ownership['total']
acs_auto_ownership['source'] = "acs data"
bg_auto_ownership = acs_auto_ownership[['source','block_group_id','0','1','2+']]
bg_auto_ownership = pd.melt(bg_auto_ownership, id_vars=['source','block_group_id'], value_vars=['0','1','2+'], var_name='auto_ownership_2',value_name='percentage')

# combine both sets of data
col_list = ['source','block_group_id','auto_ownership_2','percentage']
bg_auto_ownership = pd.concat([df[col_list].copy(),
                               bg_auto_ownership[col_list].copy()])

In [162]:
df_plot = pd.pivot(bg_auto_ownership, index=['block_group_id','auto_ownership_2'], columns='source', values='percentage').reset_index()



fig = px.scatter(df_plot, x="acs data", y="model", trendline="ols", trendline_color_override='rgb(136, 136, 136)',
                 template="plotly_white",
                 facet_col='auto_ownership_2', height=400, width=1000,
                 title="Auto ownership model results validation with acs data")
fig.update_xaxes(dtick=20)
fig.update_yaxes(dtick=20,range=[0, 100])
fig.update_traces(marker_size=3)
fig.update_layout(height=400, width=700, font=dict(size=11))
fig.show()