These notebooks are used to compare a base and scenario, from expanded surveys or model outputs, in H5 format. To run: from the menu bar above, choose **Cell -> Run All ** or run lines individually. Use the toggle button below to hide/show the raw Python code.

## Tour Behaviors

*Summaries:*
    - Tour Primary Destination (3.1)
    - Work-Based Subtour Generation (3.2)
    - Tour Main Mode (3.3)
    - Tour Time of Day (3.4)
---

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
import os
import numpy as np
import pandas as pd
import h5py
import pylab as P
from IPython.display import display, display_pretty, Javascript, HTML
from pandas_highcharts.core import serialize
from pandas_highcharts.display import display_charts
import matplotlib.pyplot as plt

# Show charts in notebook
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
# Load h5 or daysim outputs records
# Refering to 2 datasets as BASE and SCEN (scenario)

# Note that both 2006 and 2014 are un-expanded survey sampels
# Weights must be added to compare the two files
base = h5py.File(r'R:\SoundCast\estimation\2014\P5\survey14.h5','r+')
base_name = '2014 Survey'

scen = h5py.File(r'R:\SoundCast\releases\TransportationFutures2010\scripts\summarize\survey.h5','r+')
scen_name = '2006 Survey'

In [9]:
def build_df(h5file, h5table, var_dict, nested):
    ''' Convert H5 into dataframe '''
    data = {}
    if nested:
        # survey h5 have nested data structure, different than daysim_outputs
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i[0] for i in h5file[h5table][var][:]]
    else:
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i for i in h5file[h5table][var][:]]

    return pd.DataFrame(data)

In [10]:
tripdict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Travel Time':'travtime',
            'Travel Cost': 'travcost',
            'Travel Distance': 'travdist',
            'Mode': 'mode',
            'Purpose':'dpurp',
            'Departure Time': 'deptm',
            'Expansion Factor': 'trexpfac'}

In [11]:
trip_base = build_df(h5file=base, h5table='Trip', var_dict=tripdict, nested=False)
trip_scen = build_df(h5file=scen, h5table='Trip', var_dict=tripdict, nested=True)

In [12]:
persondict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Transit Pass': 'ptpass',
            'Auto Time to Work': 'pwautime',
            'Auto Distance to Work': 'pwaudist',
            'Worker Type': 'pwtyp',
            'Student Type': 'pstyp',
            'Usual Commute Mode': 'pwtaz',
            'Workplace TAZ': 'pwtaz',
            'Age': 'pagey',
            'Person Type': 'pptyp',
            'Expansion Factor': 'psexpfac'}

In [14]:
person_scen = build_df(h5file=scen, h5table='Person', var_dict=persondict, nested=True)
person_base = build_df(h5file=base, h5table='Person', var_dict=persondict, nested=False)

In [15]:
# Create unique ID for person by concatenating household ID and person number 
person_scen['personID'] = (person_scen['Household ID'].astype('str')+person_scen['Person Number'].astype('str')).astype('int')
person_base['personID'] = (person_base['Household ID'].astype('str')+person_base['Person Number'].astype('str')).astype('int')

In [16]:
hhdict={'Household ID': 'hhno',
        'Household Size': 'hhsize',
        'Household Vehicles': 'hhvehs',
        'Household Workers': 'hhwkrs',
        'Household Income': 'hhincome',
        'Household TAZ': 'hhtaz',
        'Expansion Factor': 'hhexpfac'}

In [17]:
hh_scen = build_df(h5file=scen, h5table='Household', var_dict=hhdict, nested=True)
hh_base = build_df(h5file=base, h5table='Household', var_dict=hhdict, nested=False)

In [18]:
# person-day records
tourdict={}

In [20]:
# persday_scen = build_df(h5file=scen, h5table='PersonDay', var_dict=persondaydict, nested=True)
# persday_base = build_df(h5file=base, h5table='PersonDay', var_dict=persondaydict, nested=False)

In [None]:
# Create unique ID for person by concatenating household ID and person number
persday_scen['personID'] = (persday_scen['Household ID'].astype('str')+persday_scen['Person Number'].astype('str')).astype('int')
persday_base['personID'] = (persday_base['Household ID'].astype('str')+persday_base['Person Number'].astype('str')).astype('int')

In [None]:
# # Join household records to person records
# hh_per_scen = pd.merge(left=person_scen, right=hh_scen,on='Household ID',suffixes=('_p','_h'))
# hh_per_base = pd.merge(left=person_base, right=hh_base,on='Household ID',suffixes=('_p','_h'))

In [None]:
# # Join household geography
# taz_geog = pd.read_csv(r'utils/taz_lookup.csv')
# taz_geog.reindex
# hh_per_scen_home_geog = pd.merge(hh_per_scen, taz_geog, left_on='Household TAZ', right_on='TAZ')
# hh_per_base_home_geog = pd.merge(hh_per_base, taz_geog, left_on='Household TAZ', right_on='TAZ')

In [None]:
# # Join workplace geography
# hh_per_scen_work_geog = pd.merge(hh_per_scen, taz_geog, left_on='Workplace TAZ', right_on='TAZ')
# hh_per_base_work_geog = pd.merge(hh_per_base, taz_geog, left_on='Workplace TAZ', right_on='TAZ')

---

## Tours Destinations

---