In [1]:
import numpy as np
import pandas as pd
import csv
import json
import math
import pyreadstat
# Suppressing scientific notation in pandas? - Stack Overflow
pd.set_option('display.float_format', lambda x: '%.3f' % x)

For age, 

- we are going to use the fixed year of birth (YOB) groups and convert them (survey year – YOB) into age ranges. 
- For each survey year (1995-2017), we will set the minimum age at 16, which will be indicated in the final results. 
- The imputed age (R_AGE_IMP in 2017 and R_AGE in 1995,2001, and 2009) will be the specific age variable used in the final tables. 
- We will not include any other years of birth other than those indicated in your prior email: 1946-1964, 1965-1980, 1981-1996, 1997-2012

For the trip purpose, we will use variable WHYTRP90 with the following categories:
1.	Work (01–To/From Work + 02–Work-Related Business)
2.	School/church (05–School/Church)
3.	Other social/recreational (10–Other Social/Recreational)
4.	Shopping and family/personal business (03–Shopping + 04–Other Family/Personal Business)
5.	Visit friends/relatives (08– Visit Friends/Relatives)

For each survey year, you will see four tables:
1.	Annualized weighted sum for person trips (PT) by age group (YOB based) by trip purpose for all 7 days.
2.	Sample size for person trips (PT) by age group (YOB based) by trip purpose for all 7 days.
3.	Annualized weighted sum for person trips (PT) by age group (YOB based) by trip purpose for weekdays (M-F) only.
4.	Sample size for person trips (PT) by age group (YOB based) by trip purpose for weekdays (M-F) only.

Hence, in total 16 tables will be generated.


#### 2017 NHTS

In [2]:
# Import the trip public file from local file in to a dataframe
f_trippub = r'E:\GIS_Data\Layla2019_NHTS_backup\2017csv_v1.2\trippub.csv'
trip = pd.read_csv(f_trippub)

In [4]:
# Recode the trip purpose variable
def whytrp90_re (row):
    purp = row['WHYTRP90']
    if purp in [1, 2]:
        return '01 - Work'
    elif purp == 5:
        return '02 - School/church'
    elif purp == 10:
        return '03 - Other social/recreational'
    elif purp in [3, 4]:
        return '04 - Shopping and family/personal business'
    elif purp == 8:
        return '05 - Visit friends/relatives'

In [6]:
# Recode the R_AGE_IMP variable
# *** convert year of birth (YOB) to age: Survey year 2017 - YOB
# *** 1946-1964 --> 53-71
# *** 1965-1980 --> 37-52, 
# *** 1981-1996 --> 21-36, 
# *** 1997-2001 (instead of 2012 to keep minimum age at 16) --> 16-20

def r_age_imp_re (row):
    age = row['R_AGE_IMP']
    if age >= 53 and age <= 71:
        return '01 - 1946-1964 (age 53-71)'
    elif age >= 37 and age <= 52:
        return '02 - 1965-1980 (age 37-52)'
    elif age >= 21 and age <= 36:
        return '03 - 1981-1996 (age 21-36)'
    elif age >= 16 and age <= 20:
        return '04 - 1997-2001 (age 16-20)'

In [7]:
# To create the two variables based on the two functions above
trip['WHYTRP90_re'] = trip.apply(whytrp90_re, axis = 1)
trip['R_AGE_IMP_re'] = trip.apply(r_age_imp_re, axis = 1)
# Select variables needed for the tables
trip_1 = trip[['R_AGE_IMP', 'R_AGE_IMP_re', 'WHYTRP90', 'WHYTRP90_re', 'TRAVDAY', 'WTTRDFIN']]
# Quick preview
trip_1.head()

Unnamed: 0,R_AGE_IMP,R_AGE_IMP_re,WHYTRP90,WHYTRP90_re,TRAVDAY,WTTRDFIN
0,67,01 - 1946-1964 (age 53-71),5,02 - School/church,2,75441.906
1,67,01 - 1946-1964 (age 53-71),5,02 - School/church,2,75441.906
2,66,01 - 1946-1964 (age 53-71),1,01 - Work,2,71932.646
3,66,01 - 1946-1964 (age 53-71),1,01 - Work,2,71932.646
4,28,03 - 1981-1996 (age 21-36),5,02 - School/church,2,80122.687


In [12]:
# For all days in a week
## weighted sum
all7_sum = pd.crosstab([trip_1.R_AGE_IMP_re], [trip_1.WHYTRP90_re],
            trip_1.WTTRDFIN, aggfunc = 'sum',
            rownames=['YOB'],
            colnames=['Trip purpose'],
            dropna=False,
            margins=True)

## sample size
all7_sample = pd.crosstab([trip_1.R_AGE_IMP_re], [trip_1.WHYTRP90_re],
            trip_1.WHYTRP90_re, aggfunc = 'count',
            rownames=['YOB'],
            colnames=['Trip purpose'],
            dropna=False,
            margins=True)

# For weekdays only (M-F)
# Weekend in TRAVDAY: 1 = Sunday and 7 = Saturday
# Weekdays in TRAVDAY: 2-6 as M-F

weekdays = [2, 3, 4, 5, 6]
trip_1_wkdays = trip_1[trip_1['TRAVDAY'].isin(weekdays)]

## weighted sum
weekdays_sum = pd.crosstab([trip_1_wkdays.R_AGE_IMP_re], [trip_1_wkdays.WHYTRP90_re],
            trip_1_wkdays.WTTRDFIN, aggfunc = 'sum',
            rownames=['YOB'],
            colnames=['Trip purpose'],
            dropna=False,
            margins=True)

## sample size
weekdays_sample = pd.crosstab([trip_1_wkdays.R_AGE_IMP_re], [trip_1_wkdays.WHYTRP90_re],
            trip_1_wkdays.WHYTRP90_re, aggfunc = 'count',
            rownames=['YOB'],
            colnames=['Trip purpose'],
            dropna=False,
            margins=True)

# export all tables into one excelbook
savetofile = r'E:\GIS_Data\Layla2019_NHTS_backup\NHTS_Layla\03_UserSupport\M03_2021\Q03008_RBlair\NHTS2017_tbs.xlsx'
writer = pd.ExcelWriter(savetofile, engine='xlsxwriter')

all7_sum.to_excel(writer, sheet_name='All7_sum')
all7_sample.to_excel(writer, sheet_name='All7_sample')
weekdays_sum.to_excel(writer, sheet_name='WD5_sum')
weekdays_sample.to_excel(writer, sheet_name='WD5_sample')

writer.save()

# further data format adjustment and tabs combining are done manually within the output excel.