In [1]:
import pandas as pd
import numpy as np

In [2]:
# Trip rate calculation using 2014 survey results
# Household and person files from standard survey records
# Trip files from GPS-weighted and Daysim-formatted records

In [3]:
# Load person and household records from standard survey records
person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\Final database\Release 4\2014-pr3-M-hhsurvey-persons.xlsx',
                  sheetname='Data1')
hh = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\Final database\Release 4\2014-pr3-M-hhsurvey-households.xlsx',
                  sheetname='Data')

In [4]:
# GPS Weighted trips (in Daysim format)
trip = pd.read_csv(r'survey-14-gps-weighted\tripP14_w.dat', delim_whitespace=True)

# Non GPS weighted
# trip = pd.read_csv(r'R:\SoundCast\estimation\2014\Surveys\2014 Survey\P5\tripP5.dat', delim_whitespace=True)

In [4]:
# Separate college student trips from regional survey trips
college_trips = trip[trip['hhno'] < 14000000]
trip = trip[trip['hhno'] >= 14000000]

In [9]:
person['expwt_final'].sum()

3464535.644400008

In [6]:
# Reclassify household columns for cross-classification

# Number of Workers 0 -> 3+
hh['numworkers_crossclass'] = hh['numworkers']
hh.ix[hh['numworkers'] >= 3, 'numworkers_crossclass'] = '3+'
hh['numworkers_crossclass'] = hh['numworkers_crossclass'].astype('str')

# Household size
hh['hhsize_crossclass'] = hh['hhsize']
hh.ix[hh['hhsize'] >= 4, 'hhsize_crossclass'] = '4+'
hh['hhsize_crossclass'] = hh['hhsize_crossclass'].astype('str')

# Household income
# Note that exact ranges from 2014 do not match 2006
# old ranges: 30, 60, 90+
# new ranges: 35, 75, 100+
hh['income_crossclass'] = hh['hh_income_detailed_imp']
hh.ix[hh['hh_income_detailed_imp'] <= 3, 'income_crossclass'] = '1'    # $35k  
hh.ix[(hh['hh_income_detailed_imp'] > 3) & (hh['hh_income_detailed_imp'] <= 5),    # $35-75k
           'income_crossclass'] = '2' 
hh.ix[(hh['hh_income_detailed_imp'] > 5) & (hh['hh_income_detailed_imp'] <= 6),    # $75-100k
           'income_crossclass'] = '3' 
hh.ix[hh['hh_income_detailed_imp'] >= 7, 'income_crossclass'] = '4'    # >$100k

# # Income bins
# # Note that the income bins are provided as medians that don't actually match these values 
# # but do fall within the ranges as specified
# hh.ix[hh['hhincome'] < 30000, 'hhincome'] = 1    # <$30
# hh.ix[(hh['hhincome'] >= 30000) & (hh['hhincome'] < 60000), 'hhincome'] = 2    # $30,000 - $59,999
# hh.ix[(hh['hhincome'] >= 60000) & (hh['hhincome'] < 90000), 'hhincome'] = 3    # $60,000 - $89,9999
# hh.ix[hh['hhincome'] >= 90000, 'hhincome'] = 4    # >= $90,0000

In [7]:
# Reclassify trip purpose for 4k purposes
# Use Daysim User's Guide for data dictionary for trip records

# Home-Based Work (HBW) Trips, directly from home-to-work and work-to-home
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 1), '4k_purp'] = 'HBW'
trip.ix[(trip['opurp'] == 1) & (trip['dpurp'] == 0), '4k_purp'] = 'HBW'

# Home-Based shopping
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 5), '4k_purp'] = 'Home-Based Shopping'
trip.ix[(trip['opurp'] == 5) & (trip['dpurp'] == 0), '4k_purp'] = 'Home-Based Shopping'

# Home-Based Other
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'].isin([3,4,6,7,8,9,10])), '4k_purp'] = 'Home-Based Other'
trip.ix[(trip['opurp'].isin([3,4,6,7,8,9,10]) & (trip['dpurp'] == 0)), '4k_purp'] = 'Home-Based Other'

# NHB Work-to-Other
trip.ix[(trip['opurp'] == 1) & (trip['dpurp'] != 0) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB WtO'
trip.ix[(trip['opurp'] != 0) & (trip['dpurp'] == 1) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB WtO'

# # NHB Other-to-Other (Destination and Origins are neither work nor home)
trip.ix[(trip['opurp'] != 1) & (trip['dpurp'] != 0) & (trip['opurp'] != 0) 
        & (trip['dpurp'] != 1) & (trip['4k_purp'].isnull()), '4k_purp'] = 'NHB OtO'

In [8]:
# Merge HH record info to trip records to create cross-class tables
trip_hh = pd.merge(trip, hh, left_on='hhno', right_on='hhid', how='left')

# Total households by cross-classification group (hhsize, numworkers, and income)
total_hh = hh.groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['expwt_2']]

total_hh.to_clipboard()

In [412]:
# Compare the number of households by income class (sample sizes)
hh.groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).count()[['expwt_2']].to_clipboard()

In [534]:
# Trip rates for HBW trips
purp = 'HBW'
# trips_hbw = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize','hhwkrs','hhincome']).sum()[['trexpfac']]
trips_hbw = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['trexpfac']]

df_hbw = pd.DataFrame(trips_hbw['trexpfac']/total_hh['expwt_2'])
df_hbw.columns = ['trip_rates']

trips_hbw.to_clipboard()

In [535]:
# Trip rates for HB Shopping
purp = 'Home-Based Shopping'
trips_hbs = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['trexpfac']]

df_hbs = pd.DataFrame(trips_hbs['trexpfac']/total_hh['expwt_2'])
df_hbs.columns = ['trip_rates']

In [536]:
# Trip rates for HBO trips
purp = 'Home-Based Other'
trips_hbo = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['trexpfac']]

df_hbo = pd.DataFrame(trips_hbo['trexpfac']/total_hh['expwt_2'])
df_hbo.columns = ['trip_rates']

In [537]:
trips_hbo.to_clipboard()

In [420]:
# Non Home-Based Trips
# Work to Other (Other to Work)
purp = 'NHB WtO'
trips_nhb_wto = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['trexpfac']]

df_trips_nhb_wto = pd.DataFrame(trips_nhb_wto['trexpfac']/total_hh['expwt_2'])
df_trips_nhb_wto.columns = ['trip_rates']

trips_nhb_wto.to_clipboard()

In [421]:
# Non Home-Based Trips
# Other to Other
purp = 'NHB OtO'
trips_nhb_oto = trip_hh[trip_hh['4k_purp'] == purp].groupby(['hhsize_crossclass','numworkers_crossclass','income_crossclass']).sum()[['trexpfac']]

df_trips_nhb_oto = pd.DataFrame(trips_nhb_oto['trexpfac']/total_hh['expwt_2'])
df_trips_nhb_oto.columns = ['trip_rates']

trips_nhb_oto.to_clipboard()

# Home Based School Trips

In [437]:
trip.ix[(trip['opurp'] == 0) & (trip['dpurp'] == 2), '4k_purp'] = 'School'
trip.ix[(trip['opurp'] == 2) & (trip['dpurp'] == 0), '4k_purp'] = 'School'

In [438]:
trip[trip['4k_purp'] == 'School'].sum()['trexpfac']

1354571.3491578493

In [440]:
# Group by number of school aged people in house
person['school_age'] = np.nan
person.ix[person['age'].isin([2,3,4]), 'school_age'] = 'school_age'

In [441]:
school_age_df = pd.DataFrame(person.groupby(['hhid','school_age']).count()['expwt_final'])
school_age_df.columns = ['school_age_people']
school_age_df = school_age_df.reset_index()

# Replace the <3 with 3+
school_age_df.ix[school_age_df['school_age_people'] >= 3, 'school_age_people'] = 3

# Join to household file
hh_school = pd.merge(hh, school_age_df, on='hhid', how='left')

# Mark households with zero children that were on left side of merge (no corresponding records from school_age_df)
hh_school.ix[hh_school.school_age_people.isnull(), 'school_age_people'] = 0

# Group total households by income and number of school age people
hh_school_tot = hh_school.groupby(['school_age_people','income_crossclass']).sum()[['expwt_2']]

hh_school_tot.to_clipboard()

In [443]:
# Join household categories to trip records to create cross-class totals
trip_hh = pd.merge(trip, hh_school, left_on='hhno', right_on='hhid', how='left')

# home-based school trips
hbs_tot = trip_hh[trip_hh['4k_purp']=='School'].groupby(['school_age_people','income_crossclass']).sum()[['trexpfac']]

hbs_tot.to_clipboard()


# College Trips

### Using standard trip dataset

In [590]:
# All trips from home to college (or college to home
# Using the regular households for this, not college trips
# because we have no household information for college survey respondents

# Group by number of school aged people in house
person['college_age'] = np.nan
person.ix[person['age'] == 5, 'college_age'] = 'college_age'

college_age_df = pd.DataFrame(person.groupby(['hhid','college_age']).count()['expwt_final'])
college_age_df.columns = ['college_age_people']
college_age_df = college_age_df.reset_index()

college_age_df.groupby('college_age_people').sum()

# Replace the <3 with 3+
college_age_df.ix[college_age_df['college_age_people'] >= 2, 'college_age_people'] = 2

# Join to household file
hh_college = pd.merge(hh, college_age_df, on='hhid', how='left')

# Mark households with zero college-age students that were on left side of merge
hh_college.ix[hh_college.college_age_people.isnull(), 'college_age_people'] = 0

# Group total households by income and number of school age people
hh_college_tot = hh_college.groupby(['college_age_people','income_crossclass']).sum()[['expwt_2']]

hh_college_tot.to_clipboard()

In [580]:
# Join household categories to trip records to create cross-class totals
trip_hh = pd.merge(trip, hh_college, left_on='hhno', right_on='hhid', how='left')

# trip_hh.college_age

In [589]:
# Join household categories to trip records to create cross-class totals
trip_hh = pd.merge(trip, hh_college, left_on='hhno', right_on='hhid', how='left')

# Create field to signify college trip
# Join person fiels to trip to get age and mark as college trip for school trips
trip_hh = pd.merge(trip_hh, person[['hhid','pernum','age']], 
                   left_on=['hhno','pno'], right_on=['hhid','pernum'], how='left')

# From 4k docs, this is confusing
# 'College trips are made up of home-based college trips and student trips made from
# dormitories, represented by trips from group quarters. Since the college trip purpose is not
# exclusively comprised of home-based trips, we have identified this trip purpose as “college”
# instead of “home-based college.” '

# Think that means we only want trips that end at a school location, for people 18-24?
trip_hh.ix[(trip_hh['dpurp'] == 2) & (trip_hh['college_age'] == 'college_age'), 'college'] = 'college'
# trip_hh.ix[(trip_hh['opurp'] == 2) & (trip_hh['college_age'] == 'college_age'), 'college'] = 'college'

# home-based college trips
hbc_tot = trip_hh[trip_hh['college']== 'college'].groupby(['income_crossclass','college_age_people']).sum()[['trexpfac']]

hbc_tot.to_clipboard()


In [583]:
hbc_tot

Unnamed: 0_level_0,Unnamed: 1_level_0,trexpfac
college_age_people,income_crossclass,Unnamed: 2_level_1
1.0,1,1404.180408
1.0,2,917.598345
1.0,3,872.00678
1.0,4,8160.20903
2.0,1,138.999753
2.0,2,300.564697


In [555]:
hbc_tot.sum().values

array([ 4272870.21243122])

### Using college trips and person records only

In [549]:
# college person data
# college_person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2014\Data\College database\1_PSRC_College_PersonDataset_2015-1-23_V1-3.xlsx',
#                               sheetname='PersonData')

# PUMS household totals
For comparison


In [None]:
# Load PUMS data to see if cross classification proportions match up
df = pd.read_csv(r'R:\4K\Code\TripGen\Inputs\TripGeneration\2010\pums\pumshhxc_income-size-workers.in',
                 delim_whitespace=True, skiprows=5)
df = df.reset_index()
df.columns = ['puma','column','households']
df['column'] = df['column'].astype('str').apply(lambda row: row.split(':')[0]).astype('int')

_df = df.groupby('column').sum()[['households']]
_df.to_clipboard()

In [None]:
# Load PUMS data to see if cross classification proportions match up
df = pd.read_csv(r'R:\4K\Code\TripGen\Inputs\TripGeneration\2010\pums\pumshhxc_income-k12students.in',
                 delim_whitespace=True, skiprows=5)
df = df.reset_index()
df.columns = ['puma','column','households']
df['column'] = df['column'].astype('str').apply(lambda row: row.split(':')[0]).astype('int')

_df = df.groupby('column').sum()[['households']]
_df.to_clipboard()

# Export to Emme Input