These notebooks are used to compare a base and scenario, from expanded surveys or model outputs, in H5 format. To run: from the menu bar above, choose **Cell -> Run All ** or run lines individually. Use the toggle button below to hide/show the raw Python code.

## Activity Pattern

*Summaries:*
    - Day Activity Pattern (2.1)
    - Number of Tours (2.2)
---

In [4]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
import os
import numpy as np
import pandas as pd
import h5py
import pylab as P
from IPython.display import display, display_pretty, Javascript, HTML
from pandas_highcharts.core import serialize
from pandas_highcharts.display import display_charts
import matplotlib.pyplot as plt

# Show charts in notebook
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [16]:
# Define data sources

# 2006 survey
survey06_dir = r'R:\SoundCast\releases\TransportationFutures2010\scripts\summarize'
base_format = False   # does h5 have a nested data structure?

# 2014 survey
survey14_dir = r'D:\travel-studies\2014\estimation'
scen_format = True     # does h5 have a nested data structure?

In [3]:
# Read Model Scenario Results
scen = h5py.File(survey06_dir + r'/survey.h5','r+')
scen_name = '2006 Survey'

In [4]:
# Read Base Data
base_file = r'/survey14.h5'

base = h5py.File(survey14_dir + base_file ,'r+')
base_name = '2014 Survey'

In [5]:
def build_df(h5file, h5table, var_dict, survey_file=False):
    ''' Convert H5 into dataframe '''
    data = {}
    if survey_file:
        # survey h5 have nested data structure, different than daysim_outputs
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i[0] for i in h5file[h5table][var][:]]
    else:
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i for i in h5file[h5table][var][:]]

    return pd.DataFrame(data)

In [6]:
tripdict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Travel Time':'travtime',
            'Travel Cost': 'travcost',
            'Travel Distance': 'travdist',
            'Mode': 'mode',
            'Purpose':'dpurp',
            'Departure Time': 'deptm',
            'Expansion Factor': 'trexpfac'}

In [18]:
trip_scen = build_df(h5file=scen, h5table='Trip', var_dict=tripdict, survey_file=scen_format)
trip_base = build_df(h5file=base, h5table='Trip', var_dict=tripdict, survey_file=base_format)

In [19]:
persondict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Transit Pass': 'ptpass',
            'Auto Time to Work': 'pwautime',
            'Auto Distance to Work': 'pwaudist',
            'Worker Type': 'pwtyp',
            'Student Type': 'pstyp',
            'Usual Commute Mode': 'pwtaz',
            'Workplace TAZ': 'pwtaz',
            'Age': 'pagey',
            'Person Type': 'pptyp',
            'Expansion Factor': 'psexpfac'}

In [20]:
person_scen = build_df(h5file=scen, h5table='Person', var_dict=persondict, survey_file=scen_format)
person_base = build_df(h5file=base, h5table='Person', var_dict=persondict, survey_file=base_format)

In [21]:
# Create unique ID for person by concatenating household ID and person number 
person_scen['personID'] = (person_scen['Household ID'].astype('str')+person_scen['Person Number'].astype('str')).astype('int')
person_base['personID'] = (person_base['Household ID'].astype('str')+person_base['Person Number'].astype('str')).astype('int')

In [22]:
hhdict={'Household ID': 'hhno',
        'Household Size': 'hhsize',
        'Household Vehicles': 'hhvehs',
        'Household Workers': 'hhwkrs',
        'Household Income': 'hhincome',
        'Household TAZ': 'hhtaz',
        'Expansion Factor': 'hhexpfac'}

In [23]:
hh_scen = build_df(h5file=scen, h5table='Household', var_dict=hhdict, survey_file=scen_format)
hh_base = build_df(h5file=base, h5table='Household', var_dict=hhdict, survey_file=base_format)

In [24]:
# person-day records
persondaydict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Home-Based Tours': 'hbtours',
            'Work-Based Tours': 'wbtours',
            'Tours to Usual Workplace': 'uwtours',
            'Work Tours': 'wktours',
            'School Tours': 'sctours',
            'Escort Tours': 'estours',
            'Personal Business Tours': 'pbtours',
            'Shopping Tours': 'shtours',
            'Meal Tours': 'mltours',
            'Social Tours': 'sotours',
            'Recreation Tours': 'retours',
            'Medical Tours': 'metours',
            'Work Stops': 'wkstops',
            'School Stops': 'scstops',
            'Escort Stops': 'esstops',
            'Personal Business Stops': 'pbstops',
            'Shopping Stops': 'shstops',
            'Meal Stops': 'mlstops',
            'Social Stops': 'sostops',
            'Recreation Stops': 'restops',
            'Medical Stops': 'mestops',
            'Time Worked at Home': 'wkathome',
            'Expansion Factor': 'pdexpfac'}

In [25]:
persday_scen = build_df(h5file=scen, h5table='PersonDay', var_dict=persondaydict, survey_file=scen_format)
persday_base = build_df(h5file=base, h5table='PersonDay', var_dict=persondaydict, survey_file=base_format)

In [26]:
# Create unique ID for person by concatenating household ID and person number
persday_scen['personID'] = (persday_scen['Household ID'].astype('str')+persday_scen['Person Number'].astype('str')).astype('int')
persday_base['personID'] = (persday_base['Household ID'].astype('str')+persday_base['Person Number'].astype('str')).astype('int')

In [27]:
# # Join household records to person records
# hh_per_scen = pd.merge(left=person_scen, right=hh_scen,on='Household ID',suffixes=('_p','_h'))
# hh_per_base = pd.merge(left=person_base, right=hh_base,on='Household ID',suffixes=('_p','_h'))

In [28]:
# # Join household geography
# taz_geog = pd.read_csv(r'utils/taz_lookup.csv')
# taz_geog.reindex
# hh_per_scen_home_geog = pd.merge(hh_per_scen, taz_geog, left_on='Household TAZ', right_on='TAZ')
# hh_per_base_home_geog = pd.merge(hh_per_base, taz_geog, left_on='Household TAZ', right_on='TAZ')

In [29]:
# # Join workplace geography
# hh_per_scen_work_geog = pd.merge(hh_per_scen, taz_geog, left_on='Workplace TAZ', right_on='TAZ')
# hh_per_base_work_geog = pd.merge(hh_per_base, taz_geog, left_on='Workplace TAZ', right_on='TAZ')

---

## Tours Taken

In [30]:
tour_cols = [u'Escort Tours', u'Home-Based Tours', u'Meal Tours', u'Medical Tours', u'Personal Business Tours', u'Recreation Tours', u'School Tours', u'Shopping Tours', u'Social Tours', u'Tours to Usual Workplace', u'Work Tours', u'Work-Based Tours']
persday_scen[tour_cols]

Unnamed: 0,Escort Tours,Home-Based Tours,Meal Tours,Medical Tours,Personal Business Tours,Recreation Tours,School Tours,Shopping Tours,Social Tours,Tours to Usual Workplace,Work Tours,Work-Based Tours
0,0,1,0,0,0,0,0,0,0,1,1,0
1,0,1,0,0,0,0,0,0,0,1,1,0
2,0,0,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,1,0,0,0,0,0,0,0
4,0,1,0,0,1,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,0,0
6,0,1,0,1,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,0,0


---