In [1]:
library(cowplot)
library(dplyr)
library(stringr)
library(readr)
library(tidyr)
library(lfe)
library(scales)
library(doMC)
registerDoMC(12)


********************************************************

Note: As of version 1.0.0, cowplot does not change the

  default ggplot2 theme anymore. To recover the previous

  behavior, execute:
  theme_set(theme_cowplot())

********************************************************



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix


Attaching package: ‘Matrix’


The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack



Attaching package: ‘scales’


The following object is masked from ‘package:readr’:

    col_factor


Loading required package: foreach

Loading required package: iterators

Loading required package: parallel



In [2]:
panel <- readRDS('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/data_v2.RData')
load('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/conley_selector_matrix.Rdata')
load('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/social_adj_matrix.RData')
load('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/geo_adjacency_matrix.Rdata')
source('../utils/residualizer+genAlters.R')
source("../utils/labels_and_colors.r")
source('../utils/adjacency_and_cluster_robust_inference.R')
source('../utils/geo_utils.r')

Loaded glmnet 3.0-2



Attaching package: ‘xgboost’


The following object is masked from ‘package:dplyr’:

    slice



Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths


Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



Attaching package: ‘lmtest’


The following object is masked from ‘package:lfe’:

    waldtest




In [3]:
panel %>%
    select(-matches('iv...'),
           -matches('alter'),
           -matches('shiftshare'),
           -PRCP, -TMAX) -> panel

In [4]:
getCoef <- function(df, DV, type = 'noSP', i) {
    if(type == 'noSP') {
        form <-str_c(DV, '.r ~ 0 + stay_home.r + ban_gmr.r | county_fips + ds | 0 | state_abbv')
    } else if (type == 'geoOnly') {
            form <- str_c(DV, '.r ~ 0 + stay_home.r + geo_alter_sh.r + 
                                    ban_gmr.r + geo_alter_bgmr.r | county_fips + ds | 0 | state_abbv')
    } else {
            form <- str_c(DV, '.r ~ 0 + stay_home.r + alter_sh.r + geo_alter_sh.r + 
                                    ban_gmr.r + alter_bgmr.r + geo_alter_bgmr.r | county_fips + ds | 0 | state_abbv')
    }
    
    fit <- felm(as.formula(form), df, weights = df$n)
    fp <- str_c('/nfs/sloanlab004/projects/covid_mobility_proj/models/did_placebo_state/DiD_state_', type, '_', DV, '_', str_pad(i, 4, pad = '0'), '.RDS')
    saveRDS(fit, fp)
    data.frame(summary(fit)$coef) %>%
        mutate(vars = rownames(.),
               DV = DV,
               type = type) %>%
        select(DV, type, vars, coef = Estimate) -> out
    return(out)
}


In [6]:
panel %>%
    group_by(ds, state_abbv) %>%
    summarize(frac_sh   = sum(stay_home)/n(),
              frac_bgmr = sum(ban_gmr)/n()) -> frac_state_treated

genPolicy <- function(df) {
    df %>%
        select(state_abbv) %>%
        left_join(panel, by = "state_abbv") %>%
        select(state_abbv, county_fips) %>%
        distinct() %>%
        mutate(order = sample(1:n(), n(), replace = F)) -> order
    
    n_county <- length(order)
    
    panel %>%
        inner_join(df, by = c("ds", "state_abbv")) %>%
        inner_join(order, by = c("county_fips", "state_abbv")) %>%
        mutate(stay_home = ifelse(order <= ceiling(frac_sh * n_county), 1, 0),
               ban_gmr   = ifelse(order <= ceiling(frac_bgmr * n_county), 1, 0)) -> out
        
    return(out)
}

In [7]:
frac_state_treated %>%
        ungroup() %>%
        select(state_abbv) %>%
        distinct() %>%
        mutate(swap = sample(.$state_abbv, n(), replace = F)) %>%
        left_join(frac_state_treated, by = c('swap' = 'state_abbv')) -> swap_df

swap_df %>%
    group_by(state_abbv) %>%
    do(genPolicy(.)) %>%
    ungroup()

ds,county_fips,sg_nhd,ash_nhd,state_abbv,stay_home,fb_btvrc,fb_rnstu,ash_rnstu,n,⋯,fb_btvrc.r,fb_rnstu.r,ash_rnstu.r,sg_mcbgv.r,log_mcbgv.r,ban_gmr.r,swap,frac_sh,frac_bgmr,order
<date>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>
2020-03-01,2020,0.6619292,0.6211990,AK,0,0.081104925,0.8589098,0.7780113,291538,⋯,0.048413141,0.011186004,0.009195035,0.024433266,0.018261370,0.140539418,NE,0,0,6
2020-03-01,2050,0.6103896,0.5777135,AK,0,-0.006728011,0.8702422,0.7865839,18216,⋯,-0.145258769,-0.028639183,-0.023093306,-0.176961557,-0.107036168,0.016439151,NE,0,0,3
2020-03-01,2090,0.6529996,0.6137376,AK,0,0.068958985,0.8816395,0.7951572,98971,⋯,0.035081356,0.018487181,0.012984846,0.018380261,0.016446009,0.024821174,NE,0,0,9
2020-03-01,2110,0.6432607,0.6055651,AK,0,-0.058495849,0.8666667,0.7838843,32113,⋯,-0.199950286,-0.018088314,-0.014869806,-0.027559461,-0.008191237,0.023276937,NE,0,0,2
2020-03-01,2122,0.6634996,0.6225080,AK,0,0.007026025,0.8479891,0.7697045,58533,⋯,-0.065126978,-0.016837788,-0.012755989,-0.053586813,-0.018119494,0.018090438,NE,0,0,5
2020-03-01,2130,0.5686275,0.5417377,AK,0,-0.027557270,0.8067061,0.7378963,13918,⋯,-0.112368292,-0.025465806,-0.018684072,-0.129862079,-0.066917244,0.023276937,NE,0,0,7
2020-03-01,2150,0.6159251,0.5824325,AK,0,0.079947704,0.8399123,0.7635321,13345,⋯,-0.008809001,-0.018592215,-0.015071985,-0.079614661,-0.043044163,0.023276937,NE,0,0,8
2020-03-01,2170,0.6528206,0.6135877,AK,0,0.048567227,0.8563294,0.7760525,107610,⋯,-0.001758992,0.005154722,0.004134287,-0.039979186,-0.015269961,0.016439151,NE,0,0,4
2020-03-01,2185,0.7622699,0.7029342,AK,0,0.093349270,0.9205882,0.8240919,9872,⋯,0.002987158,0.022493615,0.018123676,-0.088244951,-0.036617306,0.021611702,NE,0,0,1
2020-03-02,2020,0.7389112,0.6842525,AK,0,0.033624622,0.8920996,0.8029830,291538,⋯,0.075467770,0.021130313,0.015211915,0.097333047,0.059300679,0.147270153,NE,0,0,6


In [8]:
placebo <- function(i) {
    set.seed(i)
    frac_state_treated %>%
        ungroup() %>%
        select(state_abbv) %>%
        distinct() %>%
        mutate(swap = sample(.$state_abbv, n(), replace = F)) %>%
        left_join(frac_state_treated, by = c('swap' = 'state_abbv')) -> swap_df

    swap_df %>%
        group_by(state_abbv) %>%
        do(genPolicy(.)) %>%
        ungroup() %>%
        mutate(alter_bgmr     = weightedAlters(., WM, ban_gmr),
               alter_sh       = weightedAlters(., WM, stay_home),
               geo_alter_bgmr = weightedAlters(., gWM, ban_gmr),
               geo_alter_sh   = weightedAlters(., gWM, stay_home)) %>%
        mutate(ban_gmr.r        = panel_residualizer(ban_gmr),
               stay_home.r      = panel_residualizer(stay_home),
               alter_bgmr.r     = panel_residualizer(alter_bgmr),
               alter_sh.r       = panel_residualizer(alter_sh),
               geo_alter_bgmr.r = panel_residualizer(geo_alter_bgmr),
               geo_alter_sh.r   = panel_residualizer(geo_alter_sh)) -> df
    
    out <- foreach(DV = c('ash_rnstu', 'ash_nhd', 'fb_btvrc', 'log_mcbgv'), .combine = rbind) %:%
        foreach(type = c('noSP', 'geoOnly', 'geoSocial'), .combine = rbind) %dopar% getCoef(df, DV, type, i)
    return(out)
}

In [None]:
placeboResults <- foreach(i = 1:1000, .combine = rbind) %do% placebo(i)

In [None]:
saveRDS(placeboResults, '/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/placebo_DiD_state.RDS')

In [None]:
placeboResults <- readRDS('/nfs/sloanlab004/projects/covid_mobility_proj/data/PROCESSED_DATA/placebo_DiD_state.RDS')
panel %>%
    mutate(alter_bgmr     = weightedAlters(., WM, ban_gmr),
           alter_sh       = weightedAlters(., WM, stay_home),
           geo_alter_bgmr = weightedAlters(., gWM, ban_gmr),
           geo_alter_sh   = weightedAlters(., gWM, stay_home)) %>%
    mutate(alter_bgmr.r     = panel_residualizer(alter_bgmr),
           alter_sh.r       = panel_residualizer(alter_sh),
           geo_alter_bgmr.r = panel_residualizer(geo_alter_bgmr),
           geo_alter_sh.r   = panel_residualizer(geo_alter_sh)) -> panel

In [None]:
originalResults <- foreach(DV = c('ash_rnstu.r', 'ash_nhd.r', 'fb_btvrc.r', 'log_mcbgv.r'), .combine = rbind) %:%
        foreach(type = c('DiD', 'geoOnly', 'geoSocial'), .combine = rbind) %dopar% getCoef(panel, DV, type)

In [None]:
originalResults %>% filter(type == 'DiD')

In [None]:
options(repr.plot.width=16, repr.plot.height=10)
placeboResults %>%
    filter(type == 'DiD') %>%
    ggplot(aes(x = coef)) + 
    geom_histogram(binwidth = .0005) + 
    geom_vline(aes(xintercept = coef), data = originalResults %>% filter(type == 'DiD'), color = 'red') +
    facet_grid(vars~DV)

In [None]:
options(repr.plot.width=16, repr.plot.height=10)
placeboResults %>%
    filter(type == 'geoOnly') %>%
    ggplot(aes(x = coef)) + 
    geom_histogram(binwidth = .0005) + 
    geom_vline(aes(xintercept = coef), data = originalResults %>% filter(type == 'geoOnly'), color = 'red') +
    facet_grid(vars~DV, scales = 'free')

In [None]:
options(repr.plot.width=16, repr.plot.height=10)
placeboResults %>%
    filter(type == 'geoSocial') %>%
    ggplot(aes(x = coef)) + 
    geom_histogram(binwidth = 0.001) + 
    geom_vline(aes(xintercept = coef), data = originalResults %>% filter(type == 'geoSocial'), color = 'red') +
    facet_grid(vars~DV, scales = 'free')