In [168]:
# Convert Household, Person, and Trip files to Daysim format for estimation and calibration

In [5]:
import pandas as pd

In [1]:
# Flexible column names, given that these may change in future surveys
hhno = 'hhid'
hownrent = 'rent_own'
hrestype = 'res_type'
hhincome = 'hhincome_detailed'
hhtaz = 'final_home_taz2010'
hhexpfac = 'hh_wt_revised'
hhwkrs = 'numworkers'
hhvehs = 'vehicle_count'
pno = 'pernum'

# Household

In [2]:
def total_persons_to_hh(hh, person, daysim_field, filter_field, 
                        filter_field_list, hhid_col=hhno, wt_col=hhexpfac):
    
    """Use person field to calculate total number of person in a household for a given field
    e.g., total number of full-time workers"""
    
    df = person[person[filter_field].isin(filter_field_list)]
    df = df.groupby(hhid_col).count().reset_index()[[wt_col,hhid_col]]
    df.rename(columns={wt_col: daysim_field}, inplace=True)
    
    # Join to households
    hh = pd.merge(hh, df, how='left', on=hhid_col)
    hh[daysim_field].fillna(0, inplace=True)
    
    return hh

In [3]:
# lookup maps for various fields
hownrent_map = {1:1, # Own: own
                2:2, # Rent: rent
                3:3, # provided by job/military: other
                4:3, # other: other
                5:3} # prefer not to answer: other

hhrestype_map = {1:1, # SFH: SFH
                 2:2, # Townhouse (attached house): duplex/triplex/rowhouse
                 3:2, # Building with 3 or fewer apartments/condos: duplex/triplex/rowhouse
                 4:3, # Building with 4 or more apartments/condos: apartment/condo
                 5:4, # Mobile home/trailer: Mobile home/trailer
                 6:5, # Dorm or institutional housing: Dorm room/rented room
                 7:6, # other: other
                   }

# Use the midpoint of the ranges provided since DaySim uses actual values
income_map = {
    1: 5000,
    2: 17500,
    3: 30000,
    4: 42500,
    5: 62500,
    6: 87500,
    7: 125000,
    8: 175000,
    9: 225000,
    10: 250000,
    11: -1
}

In [6]:
hh = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-1-household.xlsx',
                         skiprows=1)
person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-2-person.xlsx',
                      skiprows=1)

In [7]:
# Do some up-front data prep
# This may be different for new data sets
# Identify high school students based on their school name
# This will not include all students, but we can start with these students
person['high_school'] = 0
person['school_loc_name'].fillna(' ', inplace=True)
person.ix[(person['school_loc_name'].str.contains("High", na=False)) &
          (person['schooltype'].isin([3,4])), "high_school"] = 1

# Students not in this group will be assumed as high school students
# if they're in age group 16-17, and 18-24 and are in K12 (public or private) 3, 4
# This is probably excluding some in the 12-15 year group, should try to sort this out better in the future
person.ix[(person['high_school'] != 0) & 
          (person['age'].isin([4,5])) &
          (person['schooltype'].isin([3,4])), 'high_school'] = 1

In [8]:
# Full-time workers
hh = total_persons_to_hh(hh, person, daysim_field='hhftw', filter_field='employment', filter_field_list=[1])

# Part-time workers
hh = total_persons_to_hh(hh, person, daysim_field='hhptw', filter_field='employment', filter_field_list=[2])

# Retirees
hh = total_persons_to_hh(hh, person, daysim_field='hhret', filter_field='employment', filter_field_list=[6])

# Other Adults
hh = total_persons_to_hh(hh, person, daysim_field='hhoad', filter_field='employment', filter_field_list=[3,4,5,7])

# University Students
hh = total_persons_to_hh(hh, person, daysim_field='hhuni', filter_field='schooltype', filter_field_list=[6])

# High school students
hh = total_persons_to_hh(hh, person, daysim_field='hhhsc', filter_field='high_school', filter_field_list=[1])

# k12 age 5-15
age5_12 = person[person['age'].isin([2,3])]
hh = total_persons_to_hh(hh, age5_12, daysim_field='hh515', filter_field='schooltype', filter_field_list=[3,4])

# age under 5
hh = total_persons_to_hh(hh, person, daysim_field='hhcu5', filter_field='age', filter_field_list=[1])

hh['hownrent'] = hh[hownrent].map(hownrent_map) 
hh['hrestype'] = hh[hrestype].map(hhrestype_map) 
hh['hhincome'] = hh[hhincome].map(income_map) 
hh['hhtaz'] = hh[hhtaz]
hh['hhexpfac'] = hh[hhexpfac]
hh['hhwkrs'] = hh[hhwkrs]
hh['hhno'] = hh[hhno]
hh['hhvehs'] = hh[hhvehs]

In [175]:
# Need the parcel ID as well!
# Use geopandas to find nearest parcel node?
import geopandas as gpd

In [11]:
# daysim_fields = ['hhno','hhsize','hhvehs','hhwkrs','hhftw','hhptw','hhret','hhoad','hhuni','hhhsc','hh515',
#                  'hhcu5','hhincome','hownrent','hrestype','hhparcel','hhtaz','hhexpfac','samptype']
# Without parcel field
daysim_fields = ['hhno','hhsize','hhvehs','hhwkrs','hhftw','hhptw','hhret','hhoad','hhuni','hhhsc','hh515',
                 'hhcu5','hhincome','hownrent','hrestype','hhtaz','hhexpfac']

# Person

In [100]:
# Reload to start with fresh data
person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-2-person.xlsx',
                      skiprows=1)

In [109]:
# Person Type

# Full time worker
person.ix[person['employment'] == 1, 'pptyp'] = 1

# Part-time worker
person.ix[person['employment'] == 2, 'pptyp'] = 2

# Non-working adult age 65+
person.ix[(person['employment'] != 1) &  (person['age'].isin([10,11,12])), 'pptyp'] = 3

# High school student age 16+
person.ix[(person['age'] >= 4) & (person['schooltype'].isin([3,4,5])), 'pptyp'] = 6

# university student (full-time)
person.ix[(person['schooltype'].isin([6,7])) & (person['student'] == 3), 'pptyp'] = 5

# Child age 5-15
person.ix[person['schooltype'].isin([2,3]), 'pptyp'] = 7

# child under 5
person.ix[person['schooltype'].isin([1]), 'pptyp'] = 8

# Non-working adult age 65 should accoutn for all others
person.ix[person['pptyp'].isnull(), 'pptyp'] = 4

# Person worker type
person.ix[person['employment'].isin([1]), 'pwtyp'] = 1
person.ix[person['employment'].isin([2]), 'pwtyp'] = 2
person.ix[person['employment'].isin([3,4,5,6,7]), 'pwtyp'] = 0
person['pwtyp'].fillna(0,inplace=True)

# Transit pass availability
# Care about people that have subsidized/free passes
# people that perceive transit cost as 0
person['ptpass'] = 0
person.ix[(person['tran_pass_12'].isin([1,2])) | (person['benefits_3'].isin([2,3])),'ptpass'] = 1

# Paid parking at work (any subsidization counts as 'paid')
person['ppaidprk'] = 0
person.ix[person['workpass'].isin([3,4]), 'ppaidprk'] = 1

# Take median age
age_map = {
    1: 2,
    2: 8,
    3: 14,
    4: 17,
    5: 21,
    6: 30,
    7: 40,
    8: 50,
    9: 60,
    10: 70,
    11: 80,
    12: 85
}

gender_map = {
    1: 1,    # male: male
    2: 2,    # female: female
    3: 9,    # another: missing
    4: 9     # prefer not to answer: missing
}

pstyp_map = {
    1: 0,
    2: 1,
    3: 2
}

person['age'] = person['age'].astype('int')
person['pagey'] = person['age'].map(hownrent_map)
person['pgend'] = person['gender'].map(gender_map)
person['pstyp'] = person['student'].map(pstyp_map)
person['pstyp'].fillna(0,inplace=True)
person['hhno'] = person['hhid']
person['pno'] = person['pernum']
person['psexpfac'] = person['hh_wt_revised']

# Need:
# pwpcl
# pwtaz
# pwautime
# pwaudist
# pspcl
# psautime
# psaudist
# puwmode
# puwarrp
# puwdepp

In [103]:
# daysim_cols = ['hhno', 'pno', 'pptyp', 'pagey', 'pgend', 'pwtyp', 'pwpcl', 'pwtaz', 'pwautime',
#                'pwaudist', 'pstyp', 'pspcl', 'pstaz', 'psautime', 'psaudist', 'puwmode', 'puwarrp', 
#                'puwdepp', 'ptpass', 'ppaidprk', 'pdiary', 'pproxy', 'psexpfac']

daysim_cols = ['hhno', 'pno', 'pptyp', 'pagey', 'pgend', 'pwtyp', 'pstyp', 
               'ptpass', 'ppaidprk', 'psexpfac','age']

# Trips

In [110]:
trip = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-5-trip.xlsx',
                      skiprows=1)

In [150]:
trip['hhno'] = trip['hhid']
trip['pno'] = trip['pernum']
trip['day'] = trip['daynum'].astype(int)
# Need: 
# tour
# half
# tseg
trip['tsvid'] = trip['tripnum']

# Recode purposes
purpose_map = {
    1: 0, # home
    6: 2, # school
    9: 3, # escort
    10: 1, # work
    11: 1, # work-related
    14: 1, # work-related
    30: 5, # grocery -> shop
    32: 5, # other shopping -> shop
    33: 4,
    34: 9, # medical
    50: 6, # restaurant -> meal
    51: 8, 
    52: 7,
    53: 8,
    54: 7, # religious/community/volunteer -> social
    56: 7, # family activity -> social
    60: 10, # change mode
    62: 7, # other social
    97: -1 # other
}

dorp_map = {
    1: 1,
    2: 2,
    3: 9
}

trip['opurp'] = trip['origin_purpose'].map(purpose_map)
trip['dpurp'] = trip['dest_purpose'].map(purpose_map)

trip['dorp'] = trip['dest_purpose'].map(dorp_map)

# origin TAZ
trip['otaz'] = trip['o_taz2010']
trip['dtaz'] = trip['d_taz2010']

# Survey reports time in minutes after midnight
# # convert to minutes after 3 am
# trip['deptm'] = trip['depart_time_mam']+(60*3)
# trip['arrtm'] = trip['arrival_time_mam']

# calculate activity duration 
# trip['a_dur'] = trip['activity_duration']
# trip['a_dur'].fillna(0, inplace=True)
# trip['endacttm'] = trip['a_dur'].astype(int)

# path type

# toll

trip['trexpfac'] = trip['trip_weight_revised']