In [1]:
library(readr)
library(tidyr)
library(stringr)
library(dplyr)
library(USAboundaries)
library(sf)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Linking to GEOS 3.5.2, GDAL 2.4.2, PROJ 4.8.0



In [2]:
fp      <- '/pool001/mfzhao/'
keys    <- read_csv(str_c(fp, 'PROCESSED_DATA/keys.csv'))
policy  <- read_csv(str_c(fp, 'PROCESSED_DATA/policy.csv'))
weather <- read_csv(str_c(fp, 'PROCESSED_DATA/region_weather_2020.csv'))
df      <- read_csv(str_c(fp, 'safegraph/dyadic2020.csv'))

Parsed with column specification:
cols(
  key = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  key = [31mcol_character()[39m,
  date = [34mcol_date(format = "")[39m,
  cluster = [31mcol_character()[39m,
  reopening = [32mcol_double()[39m,
  sdPolicy = [32mcol_double()[39m,
  stayHome = [32mcol_double()[39m
)

Parsed with column specification:
cols(
  key = [31mcol_character()[39m,
  DATE = [34mcol_date(format = "")[39m,
  PRCP = [32mcol_double()[39m,
  TMAX = [32mcol_double()[39m
)

Parsed with column specification:
cols(
  date = [34mcol_date(format = "")[39m,
  origin_county = [32mcol_double()[39m,
  destination_county = [32mcol_double()[39m,
  n = [32mcol_double()[39m
)



In [3]:
county_sf <- us_counties()

as.data.frame(county_sf) %>%
    select(-geometry) %>%
    mutate(L3 = row_number(),
           key = str_c(statefp, countyfp)) %>%
    left_join(as.data.frame(st_coordinates(county_sf))) %>%
    select(key, X, Y) -> coords

coords %>%
    inner_join(coords, by = c('X', 'Y')) %>%
    filter(key.x != key.y) %>%
    select(origin_county = key.x,
           destination_county = key.y) %>%
    mutate(bordering = 1) %>%
    arrange(origin_county, destination_county) -> bordering

Joining, by = "L3"



In [4]:
df %>%
    filter(date >= as.Date('2020-03-01'), date < as.Date('2020-07-01')) %>%
    mutate(origin_county      = str_pad(origin_county, 5, pad = '0'),
           destination_county = str_pad(destination_county, 5, pad = '0')) %>%
    inner_join(keys, by = c('origin_county' = 'key')) %>%
    inner_join(keys, by = c('destination_county' = 'key')) %>%
    left_join(bordering) %>%
    mutate(bordering = replace_na(bordering, 0)) -> df

weather %>%
    rename(date = DATE) %>%
    filter(date >= as.Date('2020-03-01'), date < as.Date('2020-07-01')) %>%
    inner_join(keys) -> weather

df %>%
    group_by(origin_county, destination_county) %>%
    summarize(n = sum(n), days = n()) -> dyad_stats

Joining, by = c("origin_county", "destination_county")

Joining, by = "key"



In [5]:
dyad_stats %>%
    group_by(days) %>%
    summarize(n = sum(n)) %>%
    arrange(desc(n)) %>%
    mutate(ecdf = cumsum(n)/sum(n)) %>%
    head()

days,n,ecdf
<int>,<dbl>,<dbl>
122,3769828171,0.5504554
610,379611045,0.6058847
488,379228196,0.6612581
366,324377223,0.7086224
732,311580576,0.7541181
854,283291358,0.7954832


In [6]:
dyad_stats %>%
    filter(origin_county != destination_county) %>%
    group_by(days) %>%
    summarize(n = sum(n)) %>%
    arrange(desc(n)) %>%
    mutate(ecdf = cumsum(n)/sum(n)) %>%
    head()

days,n,ecdf
<int>,<dbl>,<dbl>
610,379611045,0.1152388
488,379228196,0.2303613
366,324377223,0.3288327
732,311580576,0.4234195
854,283291358,0.5094184
122,215391521,0.5748049


In [8]:
df %>%
    filter(origin_county != destination_county) %>%
    group_by(date, key = destination_county) %>%
    summarize(nocd = sum(n), nocdb = sum(n * bordering)) -> nocd


write_csv(nocd, str_c(fp, 'PROCESSED_DATA/n_outside_county_devices.csv'))

In [11]:
df %>%
    inner_join(dyad_stats %>% filter(days == max(days)) %>% select(origin_county, destination_county)) %>%
    inner_join(policy %>% 
               select(date,
                      origin_county = key,
                      o.reopening   = reopening,
                      o.sdPolicy    = sdPolicy,
                      o.stayHome    = stayHome)) %>%
    inner_join(policy %>% 
               select(date,
                      destination_county = key,
                      d.reopening        = reopening,
                      d.sdPolicy         = sdPolicy,
                      d.stayHome         = stayHome)) %>% 
    filter(origin_county != destination_county) -> df

Joining, by = c("origin_county", "destination_county")

Joining, by = c("date", "origin_county")

Joining, by = c("date", "destination_county")



In [13]:
df %>%
    left_join(bordering) %>%
    mutate(bordering = replace_na(bordering, 0)) %>%
    mutate(D_o = ifelse(o.sdPolicy == 1, 'o.sdp',
                        ifelse(o.stayHome == 1, 'o.shp', 
                               ifelse(o.reopening == 1, 'o.rop', 'o.npp'))),
           D_d = ifelse(d.sdPolicy == 1, 'd.sdp',
                        ifelse(d.stayHome == 1, 'd.shp', 
                               ifelse(d.reopening == 1, 'd.rop', 'd.npp'))),
           dyad = str_c(origin_county, '->', destination_county),
           origin_cluster = str_sub(origin_county, 1, 2), 
           destination_cluster = str_sub(destination_county, 1, 2)) %>%
    select(-matches('reopening'),
           -matches('sdPolicy'),
           -matches('stayHome')) %>%
    left_join(weather %>%
              rename(origin_county = key,
                     o.PRCP = PRCP,
                     o.TMAX = TMAX)) %>%
    left_join(weather %>%
              rename(destination_county = key,
                     d.PRCP = PRCP,
                     d.TMAX = TMAX)) -> df

Joining, by = c("origin_county", "destination_county", "bordering")

Joining, by = c("date", "origin_county")

Joining, by = c("date", "destination_county")



In [None]:
write_csv(df, str_c(fp, 'PROCESSED_DATA/dyadic_mvmt_2020.csv'))