In [None]:
# Extract skim values based on Daysim attributes

In [1]:
import pandas as pd
import numpy as np
import h5py

In [2]:
# Load daysim trip records

In [3]:
daysim = h5py.File(r'R:\SoundCast\estimation\2014\P5\survey14.h5')

In [4]:
def build_df(h5file, h5table, var_dict, survey_file=False):
    ''' Convert H5 into dataframe '''
    data = {}
    if survey_file:
        # survey h5 have nested data structure, different than daysim_outputs
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i[0] for i in h5file[h5table][var][:]]
    else:
        for col_name, var in var_dict.iteritems():
            data[col_name] = [i for i in h5file[h5table][var][:]]

    return pd.DataFrame(data)

In [5]:
# List of fields to extract from trip records
tripdict={'Household ID': 'hhno',
            'Person Number': 'pno',
            'Travel Time':'travtime',
            'Travel Cost': 'travcost',
            'Travel Distance': 'travdist',
            'Mode': 'mode',
            'Purpose':'dpurp',
            'Departure Time': 'deptm',
            'Origin TAZ': 'otaz',
            'Destination TAZ': 'dtaz',
            'Departure Time': 'deptm',
            'Expansion Factor': 'trexpfac'}

In [6]:
trip = build_df(h5file=daysim, h5table='Trip', var_dict=tripdict, survey_file=False)

In [7]:
hhdict={'Household ID': 'hhno',
        'Household Size': 'hhsize',
        'Household Vehicles': 'hhvehs',
        'Household Workers': 'hhwkrs',
        'Household Income': 'hhincome',
        'Household TAZ': 'hhtaz',
        'Expansion Factor': 'hhexpfac'}

In [8]:
hh = build_df(h5file=daysim, h5table='Household', var_dict=hhdict, survey_file=False)

In [9]:
# Join household to trip data to get income
trip_hh = pd.merge(trip,hh, on='Household ID')

In [10]:
# Build a lookup variable to find skim value

In [11]:
def text_to_dictionary(input_filename):

    my_file=open(input_filename)
    my_dictionary = {}

    for line in my_file:
        k, v = line.split(':')
        my_dictionary[eval(k)] = v.strip()

    return(my_dictionary)

In [21]:
matrix_dict  = text_to_dictionary(r'D:\tmp\soundcast-master\inputs\skim_params\demand_matrix_dictionary.json')

In [22]:
uniqueMatrices = set(matrix_dict.values())
uniqueMatrices

{'bike',
 'h2nt1',
 'h2nt2',
 'h2nt3',
 'h2tl1',
 'h2tl2',
 'h2tl3',
 'h3nt1',
 'h3nt2',
 'h3nt3',
 'h3tl1',
 'h3tl2',
 'h3tl3',
 'hvtrk',
 'lttrk',
 'metrk',
 'svnt1',
 'svnt2',
 'svnt3',
 'svtl1',
 'svtl2',
 'svtl3',
 'trnst',
 'walk'}

In [23]:
# Add a field for skims based on mode, vot, and tollpath
# tollpath is always set to 1?

testvot = 2

df = trip_hh[['Mode','Value of Time', 'Departure Time','Origin TAZ', 'Destination TAZ']]
df['Toll Class'] = np.ones(len(df))

KeyError: "['Value of Time'] not in index"

In [None]:
min(df['VOT Bin'])

In [None]:
df['VOT Bin'] = pd.cut(df['Value of Time'], bins=[0,15,25,99999], right=True, labels=[1,2,3], retbins=False, precision=3, include_lowest=True)

In [None]:
df['VOT Bin'] = df['VOT Bin'].astype('int')

In [None]:
# Convert departure time in min after 3 am to hours past midnight

In [None]:
df['min after midnight'] = df['Departure Time'] + 180
df['hr after midnight'] = (df['min after midnight']/60).astype('int')

In [None]:
# Note that hours midnight to 3 am will be recorded as 24-26
print max(df['hr after midnight'])
print min(df['hr after midnight'])

In [None]:
# lookup for departure time to skim times
tod_dict = {
    3: '20to5',
    4: '20to5',
    5: '5to6',
    6: '6to7',
    7: '7to8',
    8: '8to9',
    9: '9to10',
    10: '10to14',
    11: '10to14',
    12: '10to14',
    13: '10to14',
    14: '14to15',
    15: '15to16',
    16: '16to17',
    17: '17to18',
    18: '18to20',
    19: '18to20',
    20: '20to5',
    21: '20to5',
    22: '20to5',
    23: '20to5',
    24: '20to5',
    25: '20to5',
    26: '20to5'
}

In [None]:
hours=np.asarray(df['hr after midnight'])

In [None]:
df['dephr'] = [tod_dict[hours[i]] for i in xrange(len(hours))]

In [None]:
# df['dephr']

In [None]:
# Create an ID to match skim naming method
mode_dict = {
    1: 'walk',
    2: 'bike',
    3: 'sv',
    4: 'h2',
    5: 'h3',
    6: 'tr',
    7: 'ot',
    8: 'ot',
    9: 'ot'
}

In [None]:
modes=np.asarray(df['Mode'])

In [None]:
df['mode code'] = [mode_dict[modes[i]] for i in xrange(len(modes))]

In [None]:
# Concatenate for an id
df['skim_id'] = df['mode code'] + 'tl' + df['VOT Bin'].astype('str')

In [None]:
# Open up skims by TOD
tods = set(tod_dict.values())

In [None]:
test=h5py.File(r'R:\SoundCast\releases\TransportationFutures2010\inputs\7to8.h5')

In [None]:
skim_dict = {}
working_dir = r'R:\SoundCast\releases\TransportationFutures2010\inputs'
for tod in tods:
    contents = h5py.File(working_dir + r'/'+ tod + '.h5')
    skim_dict[tod] = contents

In [None]:
# Look up skim values by tod
testrecord = df.iloc[0]
skim_dict[testrecord['dephr']]

In [None]:
final_df = pd.DataFrame()
for mode in np.unique(df['mode code']):
    print "processing skim lookup ID: " + mode
    mylen = len(df[df['mode code'] == mode])
    tempdf = df[df['mode code'] == mode]
    if mode not in ['walk','bike']:
        tempdf['skim_id'] = tempdf['mode code'] + 'tl' + tempdf['VOT Bin'].astype('str')
    else:
        tempdf['skim_id'] = tempdf['mode code']
    final_df = final_df.append(tempdf)
    print 'number of ' + mode + 'trips: ' + str(len(final_df))

In [None]:
df = final_df; del final_df

In [None]:
df.iloc[367577]

In [None]:
# Open a project to acquire zone numbers
from EmmeProject import *
my_project = EmmeProject(r'R:\SoundCast\releases\TransportationFutures2010\projects\7to8\7to8.emp')
zones=my_project.current_scenario.zone_numbers
dictZoneLookup = dict((value,index) for index,value in enumerate(zones))

In [None]:
# Get the travel time skims
tod = '7to8'
test_val= [df['skim_id'] + 't'][0].iloc[0]

my_matrix = skim_dict[tod]['Skims'][test_val]

my_matrix[dictZoneLookup[4]][dictZoneLookup[5]]

# Skim matrix is 0-based
# need to lookup zone id based on array index

In [None]:
my_matrix[dictZoneLookup[4]][dictZoneLookup[5]]

In [None]:
if 6 in [4,5]:
    print 'yeah'

In [None]:
# loop through each record
bikewalk_tod = '5to6'   # bike and walk are only assigned in 5to6
distance_skim_tod = '7to8'

output_array = []
missing_field=[]
for i in range(8000000,8000100):
    rowresults = {}
    rowdata = df.iloc[i]

    # loop through skim types
    for skim_type in ['t','c','d']:
 
        # if it's a bike or skim, use 5to6
        if rowdata['Mode'] in ['Bike','Walk']:
            tod = bikewalk_tod

        elif skim_type == 'd':
            tod = distance_skim_tod
        else:
            tod = rowdata['dephr']
           
        try:
            my_matrix = skim_dict[tod]['Skims'][rowdata['skim_id']+skim_type]
            otaz=rowdata['Origin TAZ']
            dtaz=rowdata['Destination TAZ']
            print i
            skim_value = my_matrix[dictZoneLookup[otaz]][dictZoneLookup[dtaz]]
#             output_array.append(skim_value)
            rowresults[skim_type] = skim_value
        except:
            missing_field.append(rowdata['skim_id']+skim_type)
#             output_array.append('-99')
            rowresults[skim_type] = '-99'
            # append a -99
    # append rowresults to master array
    output_array.append(rowresults)

In [None]:
# pd.DataFrame(output_array).to_csv('skim_travel_time_output.csv')

In [4]:
# load the skim results csv
skimresults = pd.read_csv(r'skim_travel_time_output.csv')

In [7]:
# Read in the trip DAT file (formatted to Daysim by Mark Bradley)
trip = pd.read_csv(r'R:\SoundCast\estimation\2014\P5\tripP5.dat', sep='\t')

In [26]:
# Join the skimresults to the original DAT file (trip)
joined_data = pd.merge(left=trip,right=skimresults,left_on='tripid',right_on='tripID')

In [27]:
# Replace the original field names with values from the joined data and export in standard format
joined_data['travcost'] = joined_data['c']
joined_data['travdist'] = joined_data['d']
joined_data['travtime'] = joined_data['t']

In [28]:
# Replace -99 (missing data) with -1 to match 2006 survey
for field in ['travcost','travdist','travtime']:
    joined_data.ix[joined_data[field] == -99, field] = -1

In [30]:
trip_modified = joined_data[trip.columns]

In [31]:
# 
save_loc = r'R:\SoundCast\estimation\2014\P5\skims_attached'

trip_modified.to_csv(save_loc + r'\tripP5_with_skims.dat', sep=' ')