-
Notifications
You must be signed in to change notification settings - Fork 1
/
TransitCenter_match_agencies_to_msas.R
85 lines (71 loc) · 3.09 KB
/
TransitCenter_match_agencies_to_msas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
###########################################################################
##
## Match agencies to MSAs
##
## The objective of this script is to match each agency in the ntd dataset
## to the MSA that it is in and then to extract just the agencies that are
## in the top 55 largest MSAs in the country. It outputs a csv with a row
## for each agency that falls in one of the selected msas. Each transit
## agency will have it's four digit unique identifier as well as the unique
## id for the msa that it falls in. This dataset will be joined to ntd
## datasets in order to filter out agencies that are not relevant to the
## analysis.
##
## Steps:
## 1. Generate unique set of city state combinations from ntd dataste
## 2. Geocode all city-state pairs and create spatial points object
## 3. Load MSA spatial dataset and filter for just the top 55 msas
## 4. Spatially join geocoded cities msas
## 5. Join cities-with-msas back to agency dataset creating lookup table
##
###########################################################################
library(readxl)
library(tidyverse)
library(ggmap)
library(sf)
options(stringsAsFactors = FALSE)
# Step 1 ------------------------------------------------------------------
agencies_to_geocode <- read_excel("data/input/TS2.1TimeSeriesOpExpSvcModeTOS_6.xls",
"OpExp Total") %>%
select(ntdid = `4 Digit NTDID`, city = City, state = State) %>%
mutate(ntdid = str_pad(ntdid, 4, 'left', '0'),
geocode = paste0(city, ', ', state, ' USA')) %>%
filter(state != 'PR') %>%
na.omit
searches <- unique(agencies_to_geocode$geocode)
# Step 2 ------------------------------------------------------------------
geocode_one <- function(search) {
v = c(search, geocode(search))
names(v)[1] <- 'geocode'
data.frame(v)
}
# all_cities_geocoded <- map_df(searches, geocode_one)
# write.csv(all_cities_geocoded, "data/input/all_cities_geocoded.csv", row.names = FALSE)
# The previous step will take ~15 minutes, do avoid running it, download
# the already-geocoded cities
all_cities_geocoded <- read.csv("data/input/all_cities_geocoded.csv")
agencies <- st_as_sf(na.omit(all_cities_geocoded),
coords = c('lon', 'lat'),
crs = 4326)
# Step 3 ------------------------------------------------------------------
selected_msas <- read.csv("data/input/selected_msas.csv") %>%
mutate_all(as.character)
csas <- st_read("data/input/msas/tl_2015_us_cbsa.shp") %>%
select(GEOID.msa = GEOID) %>%
left_join(selected_msas) %>%
na.omit %>%
st_transform(4326)
# Step 4 ------------------------------------------------------------------
agencies_in_msas_sp <- st_join(agencies, csas) %>%
na.omit
agencies_in_msas <- agencies_in_msas_sp %>%
as.data.frame %>%
select(-geometry)
# Step 5 ------------------------------------------------------------------
ag <- agencies_to_geocode %>%
left_join(agencies_in_msas) %>%
select(-city, -state, -geocode) %>%
na.omit %>%
distinct
write.csv(ag, "data/input/agency_msa_lookup_table.csv", row.names = FALSE)
save(ag, file = "data/input/agency_msa_lookup_table.Rdata")