These notebooks are used to compare a base and scenario, from expanded surveys or model outputs, in H5 format. To run: from the menu bar above, choose **Cell -> Run All ** or run lines individually. Use the toggle button below to hide/show the raw Python code.

 ## School and Workplace Location Models

*Summaries for Daysim Models 1.1 - 1.5*

    - Workplace Location (1.1)
        - by County
        - by District
    - School Location (1.2)
        - by County
        - by District
    - Workers Paying to Park at Work (1.3)
        - by Workplace County
        - by Workplace District
    - Transit Pass Ownership (1.4)
       - by Workplace County 
       - by Workplace District
       - by Home County 
       - by Home District
    - Auto Ownership (1.5)
       - by County
       - by District
---

In [17]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [18]:
import os
import numpy as np
import pandas as pd
import h5py
import pylab as P
from IPython.display import display, display_pretty, Javascript, HTML
from pandas_highcharts.core import serialize
from pandas_highcharts.display import display_charts
import matplotlib.pyplot as plt

# Show charts in notebook
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [19]:
# Define data sources

# 2006 survey
survey06_dir = r'R:\SoundCast\releases\TransportationFutures2010\scripts\summarize'


# 2014 survey
survey14_dir = r'D:\travel-studies\2014\estimation'

In [20]:
# Read Model Scenario Results
scen = h5py.File(survey06_dir + r'/survey.h5','r+')
scen_name = '2006 Survey'

In [21]:
# Read Base Data
base_file = r'/survey14.h5'

base = h5py.File(survey14_dir + base_file ,'r+')
base_name = '2014 Survey'

In [22]:
def build_df(h5file, h5table, var_dict, survey_file=False):
    ''' Convert H5 into dataframe '''
    data = {}
    if survey_file:
        # survey h5 have nested data structure, different than daysim_outputs
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i[0] for i in h5file[h5table][var][:]]
    else:
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i for i in h5file[h5table][var][:]]

    return pd.DataFrame(data)

In [23]:
base['Person']['pno'][:][1]

(1,)

In [24]:
tripdict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Travel Time':'travtime',
            'Travel Cost': 'travcost',
            'Travel Distance': 'travdist',
            'Mode': 'mode',
            'Purpose':'dpurp',
            'Departure Time': 'deptm',
            'Expansion Factor': 'trexpfac'}

In [25]:
trip_scen = build_df(h5file=scen, h5table='Trip', var_dict=tripdict, survey_file=True)
trip_base = build_df(h5file=base, h5table='Trip', var_dict=tripdict, survey_file=True)

In [26]:
persondict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Transit Pass': 'ptpass',
            'Auto Time to Work': 'pwautime',
            'Auto Distance to Work': 'pwaudist',
            'Worker Type': 'pwtyp',
            'Student Type': 'pstyp',
            'Usual Commute Mode': 'puwmode',
            'Workplace TAZ': 'pwtaz',
            'School TAZ': 'pstaz',
            'Age': 'pagey',
            'Person Type': 'pptyp',
            'Expansion Factor': 'psexpfac'}

In [27]:
person_scen = build_df(h5file=scen, h5table='Person', var_dict=persondict, survey_file=True)
person_base = build_df(h5file=base, h5table='Person', var_dict=persondict, survey_file=True)

In [28]:
# Create unique ID for person by concatenating household ID and person number 
person_scen['personID'] = (person_scen['Household ID'].astype('str')+person_scen['Person Number'].astype('str')).astype('int')
person_base['personID'] = (person_base['Household ID'].astype('str')+person_base['Person Number'].astype('str')).astype('int')

In [29]:
hhdict={'Household ID': 'hhno',
        'Household Size': 'hhsize',
        'Household Vehicles': 'hhvehs',
        'Household Workers': 'hhwkrs',
        'Household Income': 'hhincome',
        'Household TAZ': 'hhtaz',
        'Expansion Factor': 'hhexpfac'}

In [30]:
hh_scen = build_df(h5file=scen, h5table='Household', var_dict=hhdict, survey_file=True)
hh_base = build_df(h5file=base, h5table='Household', var_dict=hhdict, survey_file=True)

In [31]:
# Add labels for worker type
labels = {
  0: "Not a worker",  
  1: "Full-time worker",
  2: "Part-time worker",
}

person_base['Worker Type'] = ([labels[x] for x in person_base['Worker Type']])
person_scen['Worker Type'] = ([labels[x] for x in person_scen['Worker Type']])

In [32]:
# Join household records to person records
hh_per_scen = pd.merge(left=person_scen, right=hh_scen,on='Household ID',suffixes=('_p','_h'))
hh_per_base = pd.merge(left=person_base, right=hh_base,on='Household ID',suffixes=('_p','_h'))

In [33]:
# Join household geography
taz_geog = pd.read_csv(r'utils/taz_lookup.csv')
taz_geog.reindex
hh_per_scen_home_geog = pd.merge(hh_per_scen, taz_geog, left_on='Household TAZ', right_on='TAZ')
hh_per_base_home_geog = pd.merge(hh_per_base, taz_geog, left_on='Household TAZ', right_on='TAZ')

In [34]:
# Join workplace geography
hh_per_scen_work_geog = pd.merge(hh_per_scen, taz_geog, left_on='Workplace TAZ', right_on='TAZ')
hh_per_base_work_geog = pd.merge(hh_per_base, taz_geog, left_on='Workplace TAZ', right_on='TAZ')

---

## Jobs by Location

### Jobs by County

In [35]:
df = pd.DataFrame([hh_per_scen_work_geog.groupby('County').sum()['Expansion Factor_h'],
                   hh_per_base_work_geog.groupby('County').sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
County,Unnamed: 1_level_1,Unnamed: 2_level_1
King,911361.1,1105040.6864
Kitsap,77365.82,79099.78
Pierce,209718.71,254498.6217
Snohomish,198680.46,219190.826


In [36]:
# Distribution
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

Unnamed: 0_level_0,2006 Survey,2014 Survey
County,Unnamed: 1_level_1,Unnamed: 2_level_1
King,0.652311,0.666559
Kitsap,0.055375,0.047713
Pierce,0.150107,0.153513
Snohomish,0.142207,0.132216


In [37]:
display_charts(df_new, kind='bar', title='Job Location Distribution by County')

### Jobs by District

In [38]:
district_col = 'New DistrictName'
df = pd.DataFrame([hh_per_scen_work_geog.groupby(district_col).sum()['Expansion Factor_h'],
                   hh_per_base_work_geog.groupby(district_col).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,258650.11,340512.7994
Everett-Lynwood-Edmonds,122540.92,133795.0959
Kitsap,77365.82,79099.78
North Seattle-Shoreline,122276.58,132703.7331
Renton-FedWay-Kent,215984.34,235851.5392
S.Kitsap,6998.18,14534.873
Seattle CBD,221391.87,279329.921
South Pierce,104619.23,122153.537
Suburban Snohomish,76139.54,85395.7301
Tacoma,98101.3,117810.2117


In [39]:
# Distribution
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,0.18513,0.205397
Everett-Lynwood-Edmonds,0.087709,0.080705
Kitsap,0.055375,0.047713
North Seattle-Shoreline,0.08752,0.080047
Renton-FedWay-Kent,0.154592,0.142265
S.Kitsap,0.005009,0.008767
Seattle CBD,0.158462,0.168491
South Pierce,0.074882,0.073683
Suburban Snohomish,0.054497,0.051511
Tacoma,0.070216,0.071063


In [40]:
display_charts(df_new, kind='bar', title='Job Location Distribution by County')

---

** Distribution of Worker Types**

In [41]:
# Distribution
df_dist = pd.DataFrame([df[scen_name]/person_scen.count()['personID'],
                   df[base_name]/person_base.count()['personID']]).T

In [42]:
display_charts(df_dist, kind='bar', title='Worker Type Distribution')

---

## School Location

In [43]:
# Join school geography
hh_per_scen_school_geog = pd.merge(hh_per_scen, taz_geog, left_on='School TAZ', right_on='TAZ')
hh_per_base_school_geog = pd.merge(hh_per_base, taz_geog, left_on='School TAZ', right_on='TAZ')

### By County

In [44]:
df = pd.DataFrame([hh_per_scen_school_geog.groupby('County').sum()['Expansion Factor_h'],
                   hh_per_base_school_geog.groupby('County').sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
County,Unnamed: 1_level_1,Unnamed: 2_level_1
King,332074.17,692300.4446
Kitsap,47202.05,63941.319
Pierce,141727.02,239051.0313
Snohomish,134688.11,189096.419


In [45]:
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

Unnamed: 0_level_0,2006 Survey,2014 Survey
County,Unnamed: 1_level_1,Unnamed: 2_level_1
King,0.506449,0.584521
Kitsap,0.071988,0.053987
Pierce,0.216149,0.201835
Snohomish,0.205414,0.159657


In [46]:
display_charts(df_new, kind='bar', title='School Location Distribution by County')

### School Location by District

In [47]:
district_col = 'New DistrictName'
df = pd.DataFrame([hh_per_scen_school_geog.groupby(district_col).sum()['Expansion Factor_h'],
                   hh_per_base_school_geog.groupby(district_col).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,122008.2,239750.887
Everett-Lynwood-Edmonds,63164.21,90176.31
Kitsap,47202.05,63941.319
North Seattle-Shoreline,63394.92,154168.096
Renton-FedWay-Kent,73083.84,136088.7625
S.Kitsap,11018.92,14506.382
Seattle CBD,45578.22,104991.5922
South Pierce,85980.99,148555.8866
Suburban Snohomish,71523.9,98920.109
Tacoma,44727.11,75988.7627


In [48]:
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,0.186076,0.202426
Everett-Lynwood-Edmonds,0.096332,0.076137
Kitsap,0.071988,0.053987
North Seattle-Shoreline,0.096684,0.130167
Renton-FedWay-Kent,0.111461,0.114902
S.Kitsap,0.016805,0.012248
Seattle CBD,0.069512,0.088646
South Pierce,0.13113,0.125428
Suburban Snohomish,0.109082,0.08352
Tacoma,0.068214,0.064159


In [49]:
display_charts(df_new, kind='bar', title='School Location Distribution by District')

# Transit Pass Ownership

### By Home Location

In [73]:
df = pd.DataFrame([person_scen.groupby('Transit Pass').sum()['Expansion Factor'],
              person_base.groupby('Transit Pass').sum()['Expansion Factor']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Transit Pass,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2918331.51,
1,348476.15,3571494.5297


In [191]:
# By Home County
colname = 'Transit Pass'
df = pd.DataFrame([hh_per_scen_home_geog.groupby(colname).sum()['Expansion Factor_h'],
                   hh_per_base_home_geog.groupby(colname).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Transit Pass,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2918331.51,
1,348476.15,3571144.3717


In [194]:
# By Home District
colname = 'New DistrictName'
df = pd.DataFrame([hh_per_scen_home_geog.groupby(colname).sum()['Expansion Factor_h'],
                   hh_per_base_home_geog.groupby(colname).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,585887.11,660109.6451
Everett-Lynwood-Edmonds,250931.89,260224.689
Kitsap,235714.93,235666.869
North Seattle-Shoreline,380757.37,347809.815
Renton-FedWay-Kent,414538.41,490745.5559
S.Kitsap,69967.85,51829.916
Seattle CBD,134063.55,175854.824
South Pierce,403329.2,470811.7267
Suburban Snohomish,383614.2,425202.203
Tacoma,221308.23,230197.5765


### By Work Location

In [197]:
# By Work County
colname = 'County'
df = pd.DataFrame([hh_per_scen_work_geog.groupby(colname).sum()['Expansion Factor_h'],
                   hh_per_base_work_geog.groupby(colname).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
County,Unnamed: 1_level_1,Unnamed: 2_level_1
King,911361.1,1105040.6864
Kitsap,77365.82,79099.78
Pierce,209718.71,254498.6217
Snohomish,198680.46,219190.826


In [198]:
# By Work District
colname = 'New DistrictName'
df = pd.DataFrame([hh_per_scen_work_geog.groupby(colname).sum()['Expansion Factor_h'],
                   hh_per_base_work_geog.groupby(colname).sum()['Expansion Factor_h']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1
East Side,258650.11,340512.7994
Everett-Lynwood-Edmonds,122540.92,133795.0959
Kitsap,77365.82,79099.78
North Seattle-Shoreline,122276.58,132703.7331
Renton-FedWay-Kent,215984.34,235851.5392
S.Kitsap,6998.18,14534.873
Seattle CBD,221391.87,279329.921
South Pierce,104619.23,122153.537
Suburban Snohomish,76139.54,85395.7301
Tacoma,98101.3,117810.2117


---

# Auto Ownership

In [81]:
new_hh_scen = hh_scen[hh_scen.index>=0]
df = pd.DataFrame([hh_scen.groupby('Household Vehicles').sum()['Expansion Factor'],
                   hh_base.groupby('Household Vehicles').sum()['Expansion Factor']]).T
df.columns=([scen_name,base_name])
df=df[df.index>=0]
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Household Vehicles,Unnamed: 1_level_1,Unnamed: 2_level_1
0,101740.92,112175.187
1,443595.4,478265.607
2,514038.7,560284.1711
3,212356.59,218368.0022
4,69227.0,72864.8129
5,23312.56,22097.4961
6,7270.14,2295.394
7,2010.24,4625.152
8,1233.62,1874.778
9,580.28,


In [82]:
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

Unnamed: 0_level_0,2006 Survey,2014 Survey
Household Vehicles,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.073962,0.076083
1,0.322476,0.324382
2,0.373685,0.380011
3,0.154375,0.148108
4,0.050325,0.04942
5,0.016947,0.014988
6,0.005285,0.001557
7,0.001461,0.003137
8,0.000897,0.001272
9,0.000422,


In [83]:
display_charts(df_new, kind='bar', title='Auto Ownership Distribution Regionwide')

In [94]:
# Average autos per household
print scen_name + ": " + str(sum(hh_scen['Household Vehicles']*hh_scen['Expansion Factor'])/sum(hh_scen['Expansion Factor']))
print base_name + ": " + str(sum(hh_base['Household Vehicles']*hh_base['Expansion Factor'])/sum(hh_base['Expansion Factor']))

2006 Survey: 1.8735651003
2014 Survey: 1.53171885555


In [95]:
# Auto Ownership by Income
# Create common income ranges
def map_income(df, in_field, out_field):
    
    # Define categories
    incmap = {}
    for i in range(0, 20000):
        incmap.update({i: ' <20k'})
    for i in range(20000, 40000):
        incmap.update({i: '20k-40k'})
    for i in range(40000, 60000):
        incmap.update({i: '40k-60k'})
    for i in range(60000, 75000):
        incmap.update({i: '60k-75k'})
    for i in range(75000, 100000):
        incmap.update({i: '75k-100k'})
    for i in range(100000, 150000):
        incmap.update({i: '100k-150k'})
    for i in range(150000, int(df[in_field].max())+1):
        incmap.update({i: '>150k'})

    df[out_field] = df[in_field].map(incmap)
    return df

In [96]:
hh_scen = map_income(hh_scen, in_field='Household Income', out_field='Income')
hh_base = map_income(hh_base, in_field='Household Income', out_field='Income')

In [97]:
hh_base.groupby('Household Income').count()

Unnamed: 0_level_0,Expansion Factor,Household ID,Household Size,Household TAZ,Household Vehicles,Household Workers,Income
Household Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
-1,2524,2524,2524,2524,2524,2524,0
5000,225,225,225,225,225,225,225
12500,14,14,14,14,14,14,14
17500,513,513,513,513,513,513,513
30000,455,455,455,455,455,455,455
37500,42,42,42,42,42,42,42
40000,642,642,642,642,642,642,642
62500,1035,1035,1035,1035,1035,1035,1035
87500,862,862,862,862,862,862,862
125000,994,994,994,994,994,994,994


In [172]:
df_scen = pd.pivot_table(data=hh_scen, index='Household Vehicles', columns=['Income'], 
                    aggfunc='sum', values='Expansion Factor', fill_value=False, margins=False)
df_base = pd.pivot_table(data=hh_base, index='Household Vehicles', columns=['Income'], 
                    aggfunc='sum', values='Expansion Factor', fill_value=False, margins=False)
# Sort the columns
df_scen = df_scen[[' <20k','20k-40k','40k-60k','60k-75k',
                   '75k-100k','100k-150k','>150k']]
df_base = df_base[[' <20k','20k-40k','40k-60k','60k-75k',
                   '75k-100k','100k-150k','>150k']]

In [173]:
# Calculate averages by income class
df = pd.DataFrame([[sum(df_scen[colname]*df_scen.index)/sum(df_scen[colname]) for colname in df_scen.columns],
                    [sum(df_base[colname]*df_base.index)/sum(df_base[colname]) for colname in df_base.columns]]).T
df.index=df_base.columns
df.columns=[scen_name,base_name]
df['% difference 2006->2014']= (df[base_name]-df[scen_name])/df[scen_name]
df

Unnamed: 0_level_0,2006 Survey,2014 Survey,% difference 2006->2014
Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<20k,0.826999,1.005473,0.21581
20k-40k,1.353486,1.361532,0.005945
40k-60k,1.843047,1.604677,-0.129334
60k-75k,2.071737,1.851326,-0.10639
75k-100k,2.337899,2.208299,-0.055434
100k-150k,2.564529,2.367755,-0.076729
>150k,2.533331,2.634852,0.040074


In [174]:
hh_per_scen_home_geog.columns

Index([u'Age', u'Auto Distance to Work', u'Auto Time to Work', u'Expansion Factor_p', u'Household ID', u'Person Number', u'Person Type', u'School TAZ', u'Student Type', u'Transit Pass', u'Usual Commute Mode', u'Worker Type', u'Workplace TAZ', u'personID', u'Expansion Factor_h', u'Household Income', u'Household Size', u'Household TAZ', u'Household Vehicles', u'Household Workers', u'TAZ', u'TAD', u'OldDistric', u'County', u'District', u'New DistrictName'], dtype='object')

In [175]:
# Average ownership by district
df_scen = pd.pivot_table(data=hh_per_scen_home_geog, index='Household Vehicles', columns=['New DistrictName'], 
                    aggfunc='sum', values='Expansion Factor_p', fill_value=False, margins=False)
df_base = pd.pivot_table(data=hh_per_base_home_geog, index='Household Vehicles', columns=['New DistrictName'], 
                    aggfunc='sum', values='Expansion Factor_p', fill_value=False, margins=False)

In [176]:
# Calculate averages by district
df = pd.DataFrame([[sum(df_scen[colname]*df_scen.index)/sum(df_scen[colname]) for colname in df_scen.columns],
                    [sum(df_base[colname]*df_base.index)/sum(df_base[colname]) for colname in df_base.columns]]).T
df.index=df_base.columns
df.columns=[scen_name,base_name]
df
df['% difference 2006->2014']= (df[base_name]-df[scen_name])/df[scen_name]
df

Unnamed: 0_level_0,2006 Survey,2014 Survey,% difference 2006->2014
New DistrictName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East Side,2.226978,2.224379,-0.001167
Everett-Lynwood-Edmonds,1.985693,1.906124,-0.040071
Kitsap,2.368156,2.209214,-0.067116
North Seattle-Shoreline,1.899898,1.197645,-0.369627
Renton-FedWay-Kent,2.214294,2.117594,-0.043671
S.Kitsap,1.966543,2.570041,0.306883
Seattle CBD,1.368956,0.814066,-0.405338
South Pierce,2.241079,2.285946,0.02002
Suburban Snohomish,2.670453,2.289971,-0.142478
Tacoma,2.137104,2.035064,-0.047747


In [183]:
# Average ownership by county
df_scen = pd.pivot_table(data=hh_per_scen_home_geog, index='Household Vehicles', columns=['County'], 
                    aggfunc='sum', values='Expansion Factor_p', fill_value=False, margins=False)
df_base = pd.pivot_table(data=hh_per_base_home_geog, index='Household Vehicles', columns=['County'], 
                    aggfunc='sum', values='Expansion Factor_p', fill_value=False, margins=False)

In [178]:
# Calculate averages by county
df = pd.DataFrame([[sum(df_scen[colname]*df_scen.index)/sum(df_scen[colname]) for colname in df_scen.columns],
                    [sum(df_base[colname]*df_base.index)/sum(df_base[colname]) for colname in df_base.columns]]).T
df.index=df_base.columns
df.columns=[scen_name,base_name]
df
df['% difference 2006->2014']= (df[base_name]-df[scen_name])/df[scen_name]
df

Unnamed: 0_level_0,2006 Survey,2014 Survey,% difference 2006->2014
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
King,2.053105,1.832945,-0.107233
Kitsap,2.368156,2.209214,-0.067116
Pierce,2.180297,2.228792,0.022242
Snohomish,2.399664,2.144242,-0.106441
