In [168]:
# Convert Household, Person, and Trip files to Daysim format for estimation and calibration

In [169]:
import pandas as pd

# Household

In [170]:
def total_persons_to_hh(hh, person, daysim_field, filter_field, 
                        filter_field_list, hhid_col='hhid', wt_col='hh_wt_revised'):
    
    """Use person field to calculate total number of person in a household for a given field
    e.g., total number of full-time workers"""
    
    df = person[person[filter_field].isin(filter_field_list)]
    df = df.groupby(hhid_col).count().reset_index()[[wt_col,hhid_col]]
    df.rename(columns={wt_col: daysim_field}, inplace=True)
    
    # Join to households
    hh = pd.merge(hh, df, how='left', on=hhid_col)
    hh[daysim_field].fillna(0, inplace=True)
    
    return hh

In [171]:
# lookup maps for various fields
hownrent_map = {1:1, # Own: own
                2:2, # Rent: rent
                3:3, # provided by job/military: other
                4:3, # other: other
                5:3} # prefer not to answer: other

hhrestype_map = {1:1, # SFH: SFH
                 2:2, # Townhouse (attached house): duplex/triplex/rowhouse
                 3:2, # Building with 3 or fewer apartments/condos: duplex/triplex/rowhouse
                 4:3, # Building with 4 or more apartments/condos: apartment/condo
                 5:4, # Mobile home/trailer: Mobile home/trailer
                 6:5, # Dorm or institutional housing: Dorm room/rented room
                 7:6, # other: other
                   }

# Use the midpoint of the ranges provided since DaySim uses actual values
income_map = {
    1: 5000,
    2: 17500,
    3: 30000,
    4: 42500,
    5: 62500,
    6: 87500,
    7: 125000,
    8: 175000,
    9: 225000,
    10: 250000,
    11: -1
}

In [197]:
hh = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-1-household.xlsx',
                         skiprows=1)
person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Export\Version 2\Restricted\In-house\2017-internal-v2-R-2-person.xlsx',
                      skiprows=1)

In [198]:
# Do some up-front data prep
# Identify high school students based on their school name
# This will not include all students, but we can start with these students
person['high_school'] = 0
person['school_loc_name'].fillna(' ', inplace=True)
person.ix[(person['school_loc_name'].str.contains("High", na=False)) &
          (person['schooltype'].isin([3,4])), "high_school"] = 1

# Students not in this group will be assumed as high school students
# if they're in age group 16-17, and 18-24 and are in K12 (public or private) 3, 4
# This is probably excluding some in the 12-15 year group, should try to sort this out better in the future
person.ix[(person['high_school'] != 0) & 
          (person['age'].isin([4,5])) &
          (person['schooltype'].isin([3,4])), 'high_school'] = 1

In [199]:
# Full-time workers
hh = total_persons_to_hh(hh, person, daysim_field='hhftw', filter_field='employment', filter_field_list=[1])

# Part-time workers
hh = total_persons_to_hh(hh, person, daysim_field='hhptw', filter_field='employment', filter_field_list=[2])

# Retirees
hh = total_persons_to_hh(hh, person, daysim_field='hhret', filter_field='employment', filter_field_list=[6])

# Other Adults
hh = total_persons_to_hh(hh, person, daysim_field='hhoad', filter_field='employment', filter_field_list=[3,4,5,7])

# University Students
hh = total_persons_to_hh(hh, person, daysim_field='hhuni', filter_field='schooltype', filter_field_list=[6])

# High school students
hh = total_persons_to_hh(hh, person, daysim_field='hhhsc', filter_field='high_school', filter_field_list=[1])

# k12 age 5-15
age5_12 = person[person['age'].isin([2,3])]
hh = total_persons_to_hh(hh, age5_12, daysim_field='hh515', filter_field='schooltype', filter_field_list=[3,4])

# age under 5
hh = total_persons_to_hh(hh, person, daysim_field='hhcu5', filter_field='age', filter_field_list=[1])

hh['hownrent'] = hh['rent_own'].map(hownrent_map) 
hh['hrestype'] = hh['res_type'].map(hhrestype_map) 
hh['hhincome'] = hh['hhincome_detailed'].map(income_map) 
hh['hhtaz'] = hh['final_home_taz2010']
hh['hhexpfac'] = hh['hh_wt_revised']
hh['hhwkrs'] = hh['numworkers']
hh['hhno'] = hh['hhid']
hh['hhvehs'] = hh['vehicle_count']

In [175]:
# Need the parcel ID as well!
# Use geopandas to find nearest parcel node?
import geopandas as gpd

In [200]:
# daysim_fields = ['hhno','hhsize','hhvehs','hhwkrs','hhftw','hhptw','hhret','hhoad','hhuni','hhhsc','hh515',
#                  'hhcu5','hhincome','hownrent','hrestype','hhparcel','hhtaz','hhexpfac','samptype']
daysim_fields = ['hhno','hhsize','hhvehs','hhwkrs','hhftw','hhptw','hhret','hhoad','hhuni','hhhsc','hh515',
                 'hhcu5','hhincome','hownrent','hrestype','hhtaz','hhexpfac']

In [201]:
hh[daysim_fields]

Unnamed: 0,hhno,hhsize,hhvehs,hhwkrs,hhftw,hhptw,hhret,hhoad,hhuni,hhhsc,hh515,hhcu5,hhincome,hownrent,hrestype,hhtaz,hhexpfac
0,17100005,2,2,1,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,87500,1,1,1709,24.441709
1,17100024,3,1,2,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,87500,1,2,557,26.224981
2,17100052,1,0,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,17500,2,1,426,25.692826
3,17100059,1,0,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,125000,2,3,568,47.768728
4,17100060,1,1,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,17500,2,2,3347,278.147225
5,17100102,1,1,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87500,2,3,553,318.861344
6,17100108,2,1,2,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,225000,2,3,458,24.899462
7,17100111,4,3,2,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,225000,1,1,184,167.400000
8,17100137,1,1,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,17500,1,3,574,142.036552
9,17100149,4,2,2,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,125000,1,3,1465,561.492027
