In [1]:
# Import necessary packages and specify processed data directory
library(dplyr)
library(stringr)
library(readr)
library(tidyr)
library(lfe)
library(lubridate)
library(ggplot2)
library(urbnmapr)
library(stargazer)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix


Attaching package: ‘Matrix’


The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack



Attaching package: ‘lubridate’


The following objects are masked from ‘package:dplyr’:

    intersect, setdiff, union


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union



Please cite as: 


 Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.2. https://CRAN.R-project.org/package=stargazer 




In [2]:
# Read in the datasets needed to do this analysis. This includes:
# 1) Information on county intervention timing
# 2) The standard safegraph dataset (to get the number of devices in a given county on a given day)
# 3) The dyadic mobility/movement data (which is located in safegraph_dyad_mobility.Rdata)
# note: safegraph_dyad_mobility.Rdata loads a dataframe called mobility_df. It is very big.

fp <- '/nfs/sloanlab004/projects/covid_mobility_proj/data/'
processed_data_directory <- '/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/'
counties_long <- read_csv(str_c(fp, 'PROCESSED_DATA/county_policy_long.csv'))
safegraph <- read_csv(str_c(fp, 'PROCESSED_DATA/safegraph_social_distancing_aggregate_county.csv'))
load(str_c(fp, 'PROCESSED_DATA/safegraph_dyad_mobility.Rdata'))

Parsed with column specification:
cols(
  fips = [32mcol_double()[39m,
  name = [31mcol_character()[39m,
  state_abbv = [31mcol_character()[39m,
  type = [31mcol_character()[39m,
  dt = [34mcol_date(format = "")[39m
)

Parsed with column specification:
cols(
  county_fips = [32mcol_double()[39m,
  device_count = [32mcol_double()[39m,
  completely_home_device_count = [32mcol_double()[39m,
  part_time_work_behavior_devices = [32mcol_double()[39m,
  full_time_work_behavior_devices = [32mcol_double()[39m,
  ds = [34mcol_date(format = "")[39m
)



In [3]:
# Transform the county information into something that can be easily joined into our dataframes.
counties_long %>% 
    mutate(value = 1) %>%
    spread(key = type, value = value) -> countyPolicy

In [4]:
# Take the large mobility dataframe....
mobility_df %>% 
# Filter our data down to our analysis dates
  filter(as.Date(ds) >= '2020-03-01') %>%
  filter(as.Date(ds) <= '2020-04-18') %>%
# Join in the origin county policy interventions
  left_join(countyPolicy, by = c('ds' = 'dt', 'origin_county' = 'fips')) %>%
# Fill in the policy intervention data for origin counties, so that days after 
# the implementation date are 1, and days before are 0.
  group_by(origin_county) %>%
  arrange(origin_county, ds, destination_county) %>%
    fill(gatherings50, gatherings500, gyms_movies, restaurants, schools, stay_home) %>%
    replace_na(list(gatherings50 = 0, gatherings500 = 0, 
                    gyms_movies = 0, restaurants = 0, 
                    schools = 0, stay_home = 0)) %>% 
  ungroup() %>%
# Join in the destination county policy interventions
  left_join(countyPolicy, by = c('ds' = 'dt', 'destination_county' = 'fips')) %>% 
# Fill in the policy intervention data for destination counties, so that days after 
# the implementation date are 1, and days before are 0.
  group_by(destination_county) %>%
  arrange(destination_county, ds, origin_county) %>%
    fill(gatherings50.y, gatherings500.y, gyms_movies.y, restaurants.y, schools.y, stay_home.y) %>%
    replace_na(list(gatherings50.y = 0, gatherings500.y = 0, 
                    gyms_movies.y = 0, restaurants.y = 0, 
                    schools.y = 0, stay_home.y = 0)) %>% 
  ungroup() %>%
# Construct our simplified policy outcomes, by essentially taking the maximum over 
# sets of similar policies.
  mutate(gatherings50.x = ifelse(stay_home.x == 1, 0, gatherings50.x),
         gatherings500.x = ifelse(stay_home.x == 1, 0, gatherings500.x), 
         gyms_movies.x = ifelse(stay_home.x == 1, 0, gyms_movies.x), 
         restaurants.x = ifelse(stay_home.x == 1, 0, restaurants.x), 
         schools.x = ifelse(stay_home.x == 1, 0, schools.x), 
         gatherings50.y = ifelse(stay_home.y == 1, 0, gatherings50.y),
         gatherings500.y = ifelse(stay_home.y == 1, 0, gatherings500.y), 
         gyms_movies.y = ifelse(stay_home.y == 1, 0, gyms_movies.y), 
         restaurants.y = ifelse(stay_home.y == 1, 0, restaurants.y), 
         schools.y = ifelse(stay_home.x == 1, 0, schools.y)) %>%
# Join in the primary safegraph DVs (will be used for num_devices in origin county to normalize)
  left_join(safegraph, by=c('ds' = 'ds', 'origin_county' = 'county_fips')) -> mobility_df

In [5]:
mobility_df %>% 
  # Consolidate policies into simplified policies
  mutate(ban_gatherings_o = pmax(ifelse(is.na(gatherings50.x), 0, gatherings50.x), 
                                 ifelse(is.na(gatherings500.x), 0, gatherings500.x)),
         ban_gatherings_d = pmax(ifelse(is.na(gatherings50.y), 0, gatherings50.y), 
                                 ifelse(is.na(gatherings500.y), 0, gatherings500.y)),
         ban_gmr_o = pmax(ifelse(is.na(gyms_movies.x), 0, gyms_movies.x), 
                          ifelse(is.na(restaurants.x), 0, restaurants.x)),
         ban_gmr_d = pmax(ifelse(is.na(gyms_movies.y), 0, gyms_movies.y), 
                          ifelse(is.na(restaurants.y), 0, restaurants.y)),
         stay_home_o = ifelse(is.na(stay_home.x), 0, stay_home.x),
         stay_home_d = ifelse(is.na(stay_home.y), 0, stay_home.y),
         # Create one variable that captures the directed pair of counties
         pair = as.factor(str_c(origin_county, '-', destination_county)),
         # Construct levels for the matched pairs of policies
         gatherings_pair = as.factor(str_c(ban_gatherings_o, '-', ban_gatherings_d)),
         gmr_pair = as.factor(str_c(ban_gmr_o, '-', ban_gmr_d)),
         stay_home_pair = as.factor(str_c(stay_home_o, '-', stay_home_d)),
         # Extract the state FIPs for origin and destination county
         origin_state = as.factor(floor(origin_county/1000)),
         destination_state = as.factor(floor(destination_county/1000)),
         # Convert counties to factors
         origin_county = as.factor(origin_county),
         destination_county = as.factor(destination_county),
         # Calculate the number of destination county CBG visits per origin device
         destination_cbg_visits_per_origin_device = num_devices/device_count
        ) %>% 
  # Replace any missing values with 0. These are generated by 4 entries, for which num_devices is 0, but 
  # device count is missing.
  mutate(destination_cbg_visits_per_origin_device = replace_na(destination_cbg_visits_per_origin_device,
                                                              0)) %>%
  # Find out how many devices were the maximum seen in an origin county. Used for weighting.
  group_by(origin_county) %>% 
  mutate(max_device_count = max(device_count, na.rm=T)) %>%
  ungroup() %>%
  # Calculate asinh and log(x+1) transforms of outcome variable
  mutate(asinh_dest_cbg_visits_per_or = asinh(destination_cbg_visits_per_origin_device),
         log_dest_cbg_visits_per_or = log(destination_cbg_visits_per_origin_device +1)) %>%
  # Select out relevant rows
  dplyr::select(ds, device_count, num_devices, destination_cbg_visits_per_origin_device, pair, 
                origin_county, destination_county, gatherings_pair, gmr_pair, stay_home_pair,
               max_device_count, asinh_dest_cbg_visits_per_or, log_dest_cbg_visits_per_or, 
                origin_state, destination_state) -> mobility_df_clean
rm(mobility_df)

In [6]:
# Save output
save(mobility_df_clean, file=paste0(c(processed_data_directory,
                                      'geo_dyad_model_data.Rdata'),
                                    sep='',
                                    collapse=''))