# TNC to Transit
How many people in the NHTS use TNCs to get to transit? How many in San Francisco?  

## Setup

In [1]:
import sys, os, re
import numpy as np
import pandas as pd
import geopandas as gpd

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
INDIR = r'Q:\Data\Surveys\HouseholdSurveys\NHTS\2017'
SURVEY = r'survey'
WEIGHTS = r'weights'
TOURS = r'tripchain'

HHLDS = 'hhpub.csv'
PERS = 'perpub.csv'
TRIPS = 'trippub.csv'
VEHS = 'vehpub.csv'

HHWGT = 'hhwgt.csv'
PSWGT = 'perwgt.csv'

In [3]:
mode_num_to_name = {-9:'Not ascertained',
                    -8:'I don\'t know',
                    -7:'I prefer not to answer',
                    1:'Walk', 2:'Bicycle', 3:'Car',
                    4:'SUV', 5:'Van', 6:'Pickup truck',
                    7:'Golf cart / Segway', 8:'Motorcycle / Moped',
                    9:'RV (motor home, ATV, snowmobile)', 
                    10:'School bus', 11:'Public or commuter bus',
                    12:'Paratransit / Dial-a-ride',
                    13:'Private / Charter / Tour / Shuttle bus',
                    14:'City-to-city bus (Greyhound, Megabus)',
                    15:'Amtrak / Commuter rail',
                    16:'Subway / elevated / light rail / street car',
                    17:'Taxi / limo (including Uber / Lyft)',
                    18:'Rental car (Including Zipcar / Car2Go)',
                    19:'Airplane',
                    20:'Boat / ferry / water taxi',
                    97:'Something Else'
                    }
purp_num_to_name = {-9:'Not ascertained',-8:'I don\'t know',-7:'I prefer not to answer',
                    1:'Regular home activities (chores, sleep)',
                    2:'Work from home (paid)',3:'Work',4:'Work-related meeting / trip',
                    5:'Volunteer activities (not paid)',6:'Drop off /pick up someone',
                    7:'Change type of transportation',8:'Attend school as a student',
                    9:'Attend child care',10:'Attend adult care',
                    11:'Buy goods (groceries, clothes, appliances, gas)',
                    12:'Buy services (dry cleaners, banking, service a car, pet care)',
                    13:'Buy meals (go out for a meal, snack, carry-out)',
                    14:'Other general errands (post office, library)',
                    15:'Recreational activities (visit parks, movies, bars, museums)',
                    16:'Exercise (go for a jog, walk, walk the dog, go to the gym)',
                    17:'Visit friends or relatives',
                    18:'Health care visit (medical, dental, therapy)',
                    19:'Religious or other community activities',
                    97:'Something else'}

## Read files

In [4]:
hh = pd.read_csv(os.path.join(INDIR,SURVEY,HHLDS))
ps = pd.read_csv(os.path.join(INDIR,SURVEY,PERS))
trips = pd.read_csv(os.path.join(INDIR,SURVEY,TRIPS))

## Create and populate next trip and last trip fields

In [5]:
# get the next trip mode
trips['nextTripSamePerson'] = trips['HOUSEID'].eq(trips['HOUSEID'].shift(-1)) & trips['PERSONID'].eq(trips['PERSONID'].shift(-1))
trips['NEXTTRPTRANS'] = trips['TRPTRANS'].shift(-1)
trips.loc[~trips['nextTripSamePerson'], 'NEXTTRPTRANS'] = np.nan

# get the last trip mode
trips['lastTripSamePerson'] = trips['HOUSEID'].eq(trips['HOUSEID'].shift(1)) & trips['PERSONID'].eq(trips['PERSONID'].shift(1))
trips['LASTTRPTRANS'] = trips['TRPTRANS'].shift(1)
trips.loc[~trips['lastTripSamePerson'], 'LASTTRPTRANS'] = np.nan

## National

In [6]:
modes = pd.DataFrame(index=list(mode_num_to_name.keys()), data=list(mode_num_to_name.values()), columns=['name'])
modes['tnc_trips_acc_to'] = trips.loc[trips['WHYTO'].eq(7) & trips['TRPTRANS'].eq(17)].groupby('NEXTTRPTRANS').size()
modes['tnc_trips_egr_from'] = trips.loc[trips['WHYFROM'].eq(7) & trips['TRPTRANS'].eq(17)].groupby('LASTTRPTRANS').size()
modes.fillna(0, inplace=True)

### National TNC Access or Egress to Transit Totals

In [7]:
modes.loc[[11, 12, 13, 14, 15, 16, 20]]

Unnamed: 0,name,tnc_trips_acc_to,tnc_trips_egr_from
11,Public or commuter bus,0.0,1.0
12,Paratransit / Dial-a-ride,1.0,0.0
13,Private / Charter / Tour / Shuttle bus,2.0,3.0
14,"City-to-city bus (Greyhound, Megabus)",1.0,0.0
15,Amtrak / Commuter rail,4.0,7.0
16,Subway / elevated / light rail / street car,0.0,1.0
20,Boat / ferry / water taxi,1.0,2.0


In [8]:
modes.loc[[11, 12, 13, 14, 15, 16, 20],['tnc_trips_acc_to','tnc_trips_egr_from']].sum()

tnc_trips_acc_to       9.0
tnc_trips_egr_from    14.0
dtype: float64

## California

In [9]:
hhca = hh.loc[hh['HHSTATE'].eq('CA')].set_index('HOUSEID')
psca = ps.set_index('HOUSEID')
psca = psca.loc[hhca.index]
psca = psca.reset_index().set_index(['HOUSEID','PERSONID'])
tripsca = trips.set_index(['HOUSEID','PERSONID'])
tripsca = tripsca.loc[psca.index]

hhca.reset_index(inplace=True)
psca.reset_index(inplace=True)
tripsca.reset_index(inplace=True)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  


In [10]:
modesca = pd.DataFrame(index=list(mode_num_to_name.keys()), data=list(mode_num_to_name.values()), columns=['name'])
modesca['tnc_trips_acc_to'] = tripsca.loc[tripsca['WHYTO'].eq(7) & tripsca['TRPTRANS'].eq(17)].groupby('NEXTTRPTRANS').size()
modesca['tnc_trips_egr_from'] = tripsca.loc[tripsca['WHYFROM'].eq(7) & tripsca['TRPTRANS'].eq(17)].groupby('LASTTRPTRANS').size()
modesca.fillna(0, inplace=True)

### California TNC Access or Egress to Transit Totals

In [11]:
modesca.loc[[11, 12, 13, 14, 15, 16, 20]]

Unnamed: 0,name,tnc_trips_acc_to,tnc_trips_egr_from
11,Public or commuter bus,0.0,0.0
12,Paratransit / Dial-a-ride,0.0,0.0
13,Private / Charter / Tour / Shuttle bus,0.0,2.0
14,"City-to-city bus (Greyhound, Megabus)",0.0,0.0
15,Amtrak / Commuter rail,0.0,0.0
16,Subway / elevated / light rail / street car,0.0,0.0
20,Boat / ferry / water taxi,0.0,1.0


In [12]:
modesca.loc[[11, 12, 13, 14, 15, 16, 20],['tnc_trips_acc_to','tnc_trips_egr_from']].sum()

tnc_trips_acc_to      0.0
tnc_trips_egr_from    3.0
dtype: float64

## San Francisco

In [13]:
tripssf = trips.loc[trips['HH_CBSA'].eq('41860')]

In [14]:
modessf = pd.DataFrame(index=list(mode_num_to_name.keys()), data=list(mode_num_to_name.values()), columns=['name'])
modessf['tnc_trips_acc_to'] = tripssf.loc[tripssf['WHYTO'].eq(7) & tripssf['TRPTRANS'].eq(17)].groupby('NEXTTRPTRANS').size()
modessf['tnc_trips_egr_from'] = tripssf.loc[tripssf['WHYFROM'].eq(7) & tripssf['TRPTRANS'].eq(17)].groupby('LASTTRPTRANS').size()
modessf.fillna(0, inplace=True)

### San Francisco TNC Access or Egress to Transit Totals

In [15]:
modessf.loc[[11, 12, 13, 14, 15, 16, 20]]

Unnamed: 0,name,tnc_trips_acc_to,tnc_trips_egr_from
11,Public or commuter bus,0.0,0.0
12,Paratransit / Dial-a-ride,0.0,0.0
13,Private / Charter / Tour / Shuttle bus,0.0,2.0
14,"City-to-city bus (Greyhound, Megabus)",0.0,0.0
15,Amtrak / Commuter rail,0.0,0.0
16,Subway / elevated / light rail / street car,0.0,0.0
20,Boat / ferry / water taxi,0.0,0.0


In [16]:
modessf.loc[[11, 12, 13, 14, 15, 16, 20],['tnc_trips_acc_to','tnc_trips_egr_from']].sum()

tnc_trips_acc_to      0.0
tnc_trips_egr_from    2.0
dtype: float64