# Add 4k Trip Purposes to 2014 Travel Survey
For the Public Release survey with standard household weights

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load household records to prepare data and generate cross-classification
hh = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\Final database\Release 4\2014-pr3-M-hhsurvey-households.xlsx',
                  sheetname='Data')

# Reclassify household columns for cross-classification

# Number of Workers 0 -> 3+
hh['numworkers_crossclass'] = hh['numworkers']
hh.ix[hh['numworkers'] >= 3, 'numworkers_crossclass'] = 3
hh['numworkers_crossclass'] = hh['numworkers_crossclass'].astype('int')

# Household size
hh['hhsize_crossclass'] = hh['hhsize']
hh.ix[hh['hhsize'] >= 4, 'hhsize_crossclass'] = 4
hh['hhsize_crossclass'] = hh['hhsize_crossclass'].astype('int')

# Household income
# Note that exact ranges from 2014 do not match 2006
# old ranges: 30, 60, 90+
# new ranges: 35, 75, 100+
hh['income_crossclass'] = hh['hh_income_detailed_imp']
hh.ix[hh['hh_income_detailed_imp'] <= 3, 'income_crossclass'] = 1    # $35k  
hh.ix[(hh['hh_income_detailed_imp'] > 3) & (hh['hh_income_detailed_imp'] <= 5),    # $35-75k
           'income_crossclass'] = 2
hh.ix[(hh['hh_income_detailed_imp'] > 5) & (hh['hh_income_detailed_imp'] <= 6),    # $75-100k
           'income_crossclass'] = 3
hh.ix[hh['hh_income_detailed_imp'] >= 7, 'income_crossclass'] = 4    # >$100k

In [3]:
# Data needs to be in format of each household on a row and columns for trips by purpose

# Load trip data and compute 4k purposes

######
# NOTE: different bins will be created based on which trip file is used
# For now, we are using the GPS trip weights to create the bins
# and using this one throughout
######

# Non GPS weighted (in Daysim format)
trip = pd.read_csv(r'R:\SoundCast\estimation\2014\Surveys\2014 Survey\P5\tripP5.dat', delim_whitespace=True)

# Separate college student trips from regional survey trips
college_trips = trip[trip['hhno'] < 14000000]
trip = trip[trip['hhno'] >= 14000000]

# Home-Based Work (HBW) Trips, directly from home-to-work and work-to-home
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 1), '4k_purp'] = 'HBW'
trip.ix[(trip['opurp'] == 1) & (trip['dpurp'] == 0), '4k_purp'] = 'HBW'

# Home-Based shopping
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 5), '4k_purp'] = 'Home-Based Shopping'
trip.ix[(trip['opurp'] == 5) & (trip['dpurp'] == 0), '4k_purp'] = 'Home-Based Shopping'

# Home-Based School
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 2), '4k_purp'] = 'School'
trip.ix[(trip['opurp'] == 2) & (trip['dpurp'] == 0), '4k_purp'] = 'School'

# Home-Based Other
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'].isin([3,4,6,7,8,9,10])), '4k_purp'] = 'Home-Based Other'
trip.ix[(trip['opurp'].isin([3,4,6,7,8,9,10]) & (trip['dpurp'] == 0)), '4k_purp'] = 'Home-Based Other'

# NHB Work-to-Other
trip.ix[(trip['opurp'] == 1) & (trip['dpurp'] != 0) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB WtO'
trip.ix[(trip['opurp'] != 0) & (trip['dpurp'] == 1) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB WtO'

# # NHB Other-to-Other (Destination and Origins are neither work nor home)
trip.ix[(trip['opurp'] != 1) & (trip['dpurp'] != 0) & (trip['opurp'] != 0) 
        & (trip['dpurp'] != 1) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB OtO'




In [5]:
# Join the 4k_purp field to the non-Daysim formatted trip file
trip_standard = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\Final database\Release 4\2014-pr3-M-hhsurvey-trips-LINKED.xlsx',
                             sheetname='Linked trips')

In [42]:
trip_standard['tripID'] = trip_standard['tripID'].astype('str')

In [43]:
# trip.hhno # first 8 values
# trip.pno # next 2 values
trip[['hhno','pno','tsvid']]

trip['original_trip_id'] = trip['hhno'].astype('str') + \
     trip['pno'].apply(lambda row: str(row).zfill(2)) + \
     trip['tsvid'].apply(lambda row: str(row).zfill(2)) 

### Notice:
Some trips from trip_standard dataset are missing from the daysim formatted records. 
Using only the daysim records.

In [51]:
df = pd.merge(trip[['original_trip_id','4k_purp']], 
              trip_standard, 
              left_on='original_trip_id', right_on='tripID')

In [53]:
len(df)

46080

In [54]:
len(trip)

46080

In [55]:
len(trip_standard)

47918

In [56]:
df.to_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\Final database\Release 4\4k Purposes\2014-pr3-M-hhsurvey-trips-LINKED-4k.xlsx',
         index=False)