In [1]:
import h5py
import pandas as pd
import numpy as np

In [2]:
daysim = h5py.File('daysim_outputs.h5','a')

In [3]:
opurp = pd.DataFrame(daysim['Trip']['opurp'][:],columns=['opurp'])
dpurp = pd.DataFrame(daysim['Trip']['dpurp'][:],columns=['dpurp'])

In [4]:
# Index of all work trips
opurp_index = np.array(opurp[opurp['opurp'] != 1].index)
dpurp_index = np.array(dpurp[dpurp['dpurp'] != 1].index)

In [5]:
work_index = np.intersect1d(opurp_index,dpurp_index)

In [6]:
len(work_index)

11548020

In [12]:
daysim.keys()

[u'Household', u'HouseholdDay', u'Person', u'PersonDay', u'Trip']

In [14]:
daysim['Trip'].keys()

[u'arrtm',
 u'dadtyp',
 u'day',
 u'deptm',
 u'dorp',
 u'dpcl',
 u'dpurp',
 u'dtaz',
 u'endacttm',
 u'half',
 u'hhno',
 u'id',
 u'mode',
 u'oadtyp',
 u'opcl',
 u'opurp',
 u'otaz',
 u'pathtype',
 u'pno',
 u'tour',
 u'tour_id',
 u'travcost',
 u'travdist',
 u'travtime',
 u'trexpfac',
 u'tseg',
 u'tsvid',
 u'vot']

In [20]:
hh_df = pd.DataFrame()
for field in daysim['Household'].keys():
    hh_df[field] = daysim['Household'][field][:]

In [21]:
hh_df

Unnamed: 0,fraction_with_jobs_outside,hh515,hhcu5,hhexpfac,hhftw,hhhsc,hhincome,hhno,hhoad,hhparcel,...,hhret,hhsize,hhtaz,hhuni,hhvehs,hhwkrs,hownrent,hrestype,samptype,zone_id
0,0.0,-1,-1,1.0,-1,-1,70371,682138,-1,1066413,...,-1,2,2246,-1,3,-1,1,1,-1,2245
1,0.0,-1,-1,1.0,-1,-1,50400,365348,-1,559114,...,-1,1,177,-1,0,-1,2,2,-1,176
2,0.0,-1,-1,1.0,-1,-1,113904,1415445,-1,787198,...,-1,2,2714,-1,2,-1,1,1,-1,2713
3,0.0,-1,-1,1.0,-1,-1,42373,424160,-1,710333,...,-1,1,3331,-1,1,-1,1,1,-1,3330
4,0.0,-1,-1,1.0,-1,-1,53424,483970,-1,1157351,...,-1,2,2248,-1,2,-1,1,1,-1,2247
5,0.0,-1,-1,1.0,-1,-1,48447,365349,-1,532834,...,-1,1,178,-1,4,-1,1,2,-1,177
6,0.0,-1,-1,1.0,-1,-1,65520,682139,-1,981643,...,-1,2,2247,-1,1,-1,1,1,-1,2246
7,0.0,-1,-1,1.0,-1,-1,5040,1,-1,302391,...,-1,1,19,-1,0,-1,1,1,-1,18
8,0.0,-1,-1,1.0,-1,-1,200214,1415446,-1,748621,...,-1,2,3287,-1,2,-1,1,1,-1,3286
9,0.0,-1,-1,1.0,-1,-1,139986,950917,-1,625544,...,-1,2,3638,-1,3,-1,2,1,-1,3637


In [7]:
# Create a new h5 container to store the results
my_store = h5py.File('cleaned_daysim.h5', "w")

In [8]:
# Add Trip records without work trips
my_store.create_group("Trip")

for field in daysim['Trip'].keys():
    print 'adding: ' + str(field)
    
    matrix_value = daysim['Trip'][field][:][work_index]
    my_store['Trip'].create_dataset(name=field, data=matrix_value, compression='gzip', dtype='float32')   

adding: arrtm
adding: dadtyp
adding: day
adding: deptm
adding: dorp
adding: dpcl
adding: dpurp
adding: dtaz
adding: endacttm
adding: half
adding: hhno
adding: id
adding: mode
adding: oadtyp
adding: opcl
adding: opurp
adding: otaz
adding: pathtype
adding: pno
adding: tour
adding: tour_id
adding: travcost
adding: travdist
adding: travtime
adding: trexpfac
adding: tseg
adding: tsvid
adding: vot


In [9]:
# Add Household records
my_store.create_group("Household")

for field in daysim['Household'].keys():
    print 'adding: ' + str(field)
    
    matrix_value = daysim['Household'][field][:]
    my_store['Household'].create_dataset(name=field, data=matrix_value, compression='gzip', dtype='float32')    

adding: fraction_with_jobs_outside
adding: hh515
adding: hhcu5
adding: hhexpfac
adding: hhftw
adding: hhhsc
adding: hhincome
adding: hhno
adding: hhoad
adding: hhparcel
adding: hhptw
adding: hhret
adding: hhsize
adding: hhtaz
adding: hhuni
adding: hhvehs
adding: hhwkrs
adding: hownrent
adding: hrestype
adding: samptype
adding: zone_id


In [10]:
# Add Person records
my_store.create_group("Person")

for field in daysim['Person'].keys():
    
    # do not include usual work taz or usual work parcel
    if field not in ['pwtaz','pwpcl']:
    
        print 'adding: ' + str(field)

        matrix_value = daysim['Person'][field][:]
        my_store['Person'].create_dataset(name=field, data=matrix_value, compression='gzip', dtype='float32')  


adding: hhno
adding: id
adding: pagey
adding: pdiary
adding: pgend
adding: pno
adding: ppaidprk
adding: pproxy
adding: pptyp
adding: psaudist
adding: psautime
adding: psexpfac
adding: pspcl
adding: pstaz
adding: pstyp
adding: ptpass
adding: puwarrp
adding: puwdepp
adding: puwmode
adding: pwtyp


In [11]:
my_store.close()