In [1]:
import numpy as np
import pandas as pd
import csv
import json
import math

# Suppressing scientific notation in pandas
pd.set_option('display.float_format', lambda x: '%.3f' % x)

Download CSV files from *2017 NHTS Downloads*: https://nhts.ornl.gov/downloads.

In [2]:
# import 2017 trip file
path_to_trip2017 = r'E:\Demo\NHTS2017_csv\trippub.csv'
trip17 = pd.read_csv(path_to_trip2017)

Please read section *7.11 Weighting the Data* in [2017 Users' Guide](https://nhts.ornl.gov/assets/NHTS2017_UsersGuide_04232019_1.pdf) first.

**Note:** The process of applying the weights is specific to Python. Check the SPSS and/or Stata documentation for instructions on how to weight and then summarize weighted data if using other software packages.

### Vehicle Trips (VT) by Trip Purpose Summary in Weekdays

For filter condition for VT and VMT, refer to sections *7.5 Vehicle Trips* and *7.6 Vehicle Miles of Travel (VMT)* 
in the 2017 Users' Guide.

In [3]:
# select VT trips for VT calculation: (TRPTRANS in pov17) AND (DVRV_FLG == 1)
pov17 = [3, 4, 5, 6, 8, 9, 18]
weekdays = [2, 3, 4, 5, 6]

# filter trip data:
#    1. VT selection
#    2. Weekday selection
trip17_vt = trip17[(trip17['TRPTRANS'].isin(pov17)) & 
                   (trip17['DRVR_FLG'] == 1) &
                  (trip17['TRAVDAY'].isin(weekdays))].copy()

#### One-way table: VT by trip purpose summary (weighted sum and pct) 

In [4]:
# check for sample size (unweighted VT by WHYTRP1S)
"""
Note: Always check sample size.
Any cell with a value below 30 is considered small sample size, 
so proceed with caution in interpreting those results
"""
trip17_vt.groupby(['WHYTRP1S']).size()

WHYTRP1S
1     167534
10     86356
20     10791
30     11144
40    105887
50     38880
70     39775
80     33693
97      8512
dtype: int64

In [5]:
# VT by trip purpose summary (annual weighted sum and percent)

# sum the ***trip weight (WTTRDFIN)*** based on categories in WHYTRP1S
tb1 = trip17_vt.groupby(['WHYTRP1S'], as_index=0).agg({'WTTRDFIN':'sum'})
# add column percent
tb1['pct'] = tb1.apply(lambda x: x['WTTRDFIN']/tb1['WTTRDFIN'].sum(), axis=1)
# check result table
tb1

Unnamed: 0,WHYTRP1S,WTTRDFIN,pct
0,1,56773803926.193,0.339
1,10,32984557026.416,0.197
2,20,4082993913.223,0.024
3,30,3111807572.194,0.019
4,40,30030044513.503,0.179
5,50,11915530691.404,0.071
6,70,15900921288.691,0.095
7,80,10365304374.367,0.062
8,97,2322212901.146,0.014


In [6]:
# rename columns:
# 'WHYTRP1S' -> 'trip_purp_summary'
# 'WTTRDFIN' -> "annual_total_VT"

tb1 = tb1.rename(columns = {'WHYTRP1S':'trip_purp_summary',
                           'WTTRDFIN':'annual_total_VT'})
tb1

Unnamed: 0,trip_purp_summary,annual_total_VT,pct
0,1,56773803926.193,0.339
1,10,32984557026.416,0.197
2,20,4082993913.223,0.024
3,30,3111807572.194,0.019
4,40,30030044513.503,0.179
5,50,11915530691.404,0.071
6,70,15900921288.691,0.095
7,80,10365304374.367,0.062
8,97,2322212901.146,0.014


DE Table Reference: https://nhts.ornl.gov/tables09/ae/work/Job146738.html. The "All" column shows the annual_total_VT.

#### Two-way table: VT by trip purpose summary by weekday (weighted sum) 

In [7]:
# create cross-tabulation for weighted annual sum
"""
Note:
For percentages, please save (or copy paste) the output table below into Excel
and manually compute the desired percentages. 
"""

pd.crosstab(trip17_vt['WHYTRP1S'], trip17_vt['TRAVDAY'],
            trip17_vt['WTTRDFIN'], aggfunc=sum,
            dropna=False,
            margins=True
)

TRAVDAY,2,3,4,5,6,All
WHYTRP1S,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,11183417328.193,11254512246.526,11688116905.686,11290546801.581,11357210644.207,56773803926.193
10,6279719900.301,6714556629.077,6725758520.499,6956538920.293,6307983056.246,32984557026.416
20,738014657.122,848311319.994,1035374508.322,856774546.873,604518880.912,4082993913.223
30,614716506.445,698677659.642,670552695.003,623973188.946,503887522.157,3111807572.194
40,6192729175.945,5683569757.132,5896917889.554,5791780977.482,6465046713.391,30030044513.503
50,2363516119.763,2123443047.902,2319860921.099,2456577691.406,2652132911.233,11915530691.404
70,2934112615.612,3201510573.695,3431222364.171,3201243089.409,3132832645.805,15900921288.691
80,1792707038.736,1967992754.804,2017080541.28,2057225720.294,2530298319.253,10365304374.367
97,352610323.693,515694570.763,515672232.2,510684262.983,427551511.506,2322212901.146
All,32451543665.811,33008268559.534,34300556577.814,33745345199.267,33981462204.709,167487176207.148


In [8]:
# create cross-tabulation for sample size
"""
Note: Always check sample size.
Any cell with a value below 30 is considered small sample size, 
so proceed with caution in interpreting those results
"""
pd.crosstab(trip17_vt['WHYTRP1S'], trip17_vt['TRAVDAY'],
             trip17_vt['WTTRDFIN'], aggfunc=lambda x: x.count(),
            dropna=False,
            margins=True
)

TRAVDAY,2,3,4,5,6,All
WHYTRP1S,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,32021.0,34065.0,33849.0,34034.0,33565.0,167534.0
10,16017.0,18088.0,17881.0,18185.0,16185.0,86356.0
20,1767.0,2450.0,2621.0,2303.0,1650.0,10791.0
30,2268.0,2369.0,2363.0,2359.0,1785.0,11144.0
40,20139.0,21273.0,20585.0,20813.0,23077.0,105887.0
50,7357.0,7395.0,7546.0,8025.0,8557.0,38880.0
70,7116.0,8111.0,8269.0,8346.0,7933.0,39775.0
80,5660.0,6370.0,6738.0,6795.0,8130.0,33693.0
97,1474.0,1918.0,1679.0,1858.0,1583.0,8512.0
All,93819.0,102039.0,101531.0,102718.0,102465.0,502572.0


DE Table Reference: https://nhts.ornl.gov/tables09/ae/work/Job146741.html.