In [4]:
library(dplyr)
library(tidyr)
library(ggsci)
library(readr)
library(lfe)
library(stringr)
library(ggplot2)
library(doMC)
registerDoMC(24)

In [7]:
df <- data.table::fread('/pool001/mfzhao/PROCESSED_DATA/dyadic_mvmt_2020.csv')
pp <- read_csv('/pool001/mfzhao/PROCESSED_DATA/policyPeriods.csv')

Parsed with column specification:
cols(
  key = [31mcol_character()[39m,
  p1sdp = [34mcol_date(format = "")[39m,
  p2shp = [34mcol_date(format = "")[39m,
  p3rop = [34mcol_date(format = "")[39m
)



In [8]:
df %>% 
    filter(origin_cluster != destination_cluster) %>%
    mutate(oRain  = as.numeric(oPRCP > 0),
           dRain  = as.numeric(dPRCP > 0),
           nearby = as.numeric(dist < 100)) -> df

In [13]:
pp %>%
    mutate(cluster = as.numeric(str_sub(key, 1, 2))) %>%
    select(-key) %>%
    distinct() %>%
    rename(p1date = p1sdp, 
           p2date = p2shp,
           p3date = p3rop) -> statePolicy

In [22]:
df %>%
    select(-matches('[od]p[123]')) %>%
    left_join(statePolicy %>%
              rename(origin_cluster = cluster,
                     odate1 = p1date,
                     odate2 = p2date,
                     odate3 = p3date)) %>%
    left_join(statePolicy %>%
              rename(destination_cluster = cluster,
                     ddate1 = p1date,
                     ddate2 = p2date,
                     ddate3 = p3date)) %>%
    mutate(op1 = as.numeric(date - odate1 >= 0),
           op2 = as.numeric(date - odate2 >= 0),
           op3 = as.numeric(date - odate3 >= 0),
           dp1 = as.numeric(date - ddate1 >= 0),
           dp2 = as.numeric(date - ddate2 >= 0),
           dp3 = as.numeric(date - ddate3 >= 0)) %>%
    replace_na(list(op2 = 0, dp2 = 0)) %>%
    select(-matches('[od]date'))

Joining, by = "origin_cluster"

Joining, by = "destination_cluster"

“Incompatible methods ("-.IDate", "-.Date") for "-"”
“Incompatible methods ("-.IDate", "-.Date") for "-"”
“Incompatible methods ("-.IDate", "-.Date") for "-"”
“Incompatible methods ("-.IDate", "-.Date") for "-"”
“Incompatible methods ("-.IDate", "-.Date") for "-"”
“Incompatible methods ("-.IDate", "-.Date") for "-"”


date,dyad,origin_cluster,destination_cluster,n,bordering,ndotd,pdotd,dist,oPRCP,⋯,dTMAX,oRain,dRain,nearby,op1,op2,op3,dp1,dp2,dp3
<date>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2020-01-01,01001->13121,1,13,55601,0,88,1.599709e-02,246.28540,0,⋯,13.900000,0,0,0,0,0,0,0,0,0
2020-01-01,01003->04013,1,4,218022,0,26,1.252348e-03,2351.89961,0,⋯,14.877778,0,1,0,0,0,0,0,0,0
2020-01-01,01003->12005,1,12,218022,0,24,1.156014e-03,208.17543,0,⋯,17.200000,0,0,0,0,0,0,0,0,0
2020-01-01,01003->12031,1,12,218022,0,11,5.298396e-04,582.48358,0,⋯,18.200000,0,0,0,0,0,0,0,0,0
2020-01-01,01003->12033,1,12,218022,1,998,4.807090e-02,34.00226,0,⋯,17.200000,0,0,1,0,0,0,0,0,0
2020-01-01,01003->12073,1,12,218022,0,15,7.225085e-04,331.98103,0,⋯,18.050000,0,0,0,0,0,0,0,0,0
2020-01-01,01003->12091,1,12,218022,0,83,3.997881e-03,108.30828,0,⋯,16.833333,0,0,0,0,0,0,0,0,0
2020-01-01,01003->12095,1,12,218022,0,179,8.621935e-03,666.99306,0,⋯,21.700000,0,0,0,0,0,0,0,0,0
2020-01-01,01003->12113,1,12,218022,0,107,5.153894e-03,66.94946,0,⋯,16.333333,0,0,1,0,0,0,0,0,0
2020-01-01,01003->12131,1,12,218022,0,87,4.190550e-03,149.09305,0,⋯,16.800000,0,0,0,0,0,0,0,0,0


In [33]:
fri <- function(df) {
    statePolicy %>%
        mutate(cluster = sample(cluster, n(), replace = F)) -> swap
    
    df %>%
    select(-matches('[od]p[123]')) %>%
    left_join(swap %>%
              rename(origin_cluster = cluster,
                     odate1 = p1date,
                     odate2 = p2date,
                     odate3 = p3date)) %>%
    left_join(swap %>%
              rename(destination_cluster = cluster,
                     ddate1 = p1date,
                     ddate2 = p2date,
                     ddate3 = p3date)) %>%
    mutate(op1 = as.numeric(date - odate1 >= 0),
           op2 = as.numeric(date - odate2 >= 0),
           op3 = as.numeric(date - odate3 >= 0),
           dp1 = as.numeric(date - ddate1 >= 0),
           dp2 = as.numeric(date - ddate2 >= 0),
           dp3 = as.numeric(date - ddate3 >= 0)) %>%
    replace_na(list(op2 = 0, dp2 = 0)) %>%
    select(-matches('[od]date')) -> temp
    
    
    fit1 <- felm(log(ndotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3
             | dyad + date | 0 | origin_cluster + destination_cluster, temp)
    fit2 <- felm(log(pdotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3
             | dyad + date | 0 | origin_cluster + destination_cluster, temp, weights = temp$n)
    
    as.data.frame(summary(fit1)$coef) %>%
    mutate(var  = rownames(.),
            dv   = 'ndotd') %>%
    bind_rows(as.data.frame(summary(fit2)$coef) %>% 
              mutate(var  = rownames(.),
                     dv   = 'pdotd')) %>%
    rename(estimate = Estimate, se = `Cluster s.e.`) -> out
    
    return(out)
}

foreach(i = 1:500, .combine = rbind) %dopar% fri(df) -> fri_results

In [30]:
fit1 <- felm(log(ndotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3
             | dyad + date | 0 | origin_cluster + destination_cluster, df)
fit2 <- felm(log(pdotd) ~ op1 + op2 + op3 + dp1 + dp2 + dp3
             | dyad + date | 0 | origin_cluster + destination_cluster, df, weights = df$n)


In [31]:
as.data.frame(summary(fit1)$coef) %>%
    mutate(var  = rownames(.),
            dv   = 'ndotd') %>%
    bind_rows(as.data.frame(summary(fit2)$coef) %>% 
              mutate(var  = rownames(.),
                     dv   = 'pdotd')) %>%
    rename(estimate = Estimate, se = `Cluster s.e.`) -> base

In [39]:
base %>%
    select(var, dv, base = estimate) -> base

In [None]:
vns <- c('dp1'   = "Dest:(ip)",
         'dp2'   = "Dest:(sh)",
         'dp3'   = "Dest:(ro)",
         'op1'   = 'Ori:(ip)',
         'op2'   = 'Ori:(sh)',
         'op3'   = 'Ori:(ro)',
         'pdotd' = 'log_pdotd', 
         'ndotd' = 'log_ndotd')

options(repr.plot.width=15, repr.plot.height=10)
fri_results %>%
    mutate(estimate = .8 * estimate) %>%
    ggplot(aes(x = estimate, fill = dv)) +
    geom_histogram(bins = 40) +
    geom_vline(aes(xintercept = base), data = base) +
    facet_grid(dv ~ var, scales = 'free', labeller = as_labeller(vns)) +
    theme_light() +
    scale_fill_d3() +
    theme(text = element_text(size = 20),
          legend.position = 'bottom') -> p

ggsave('/home/mfzhao/SI_plots/rc3b_fri_dyad.pdf', p, device = 'pdf', width = 6.5, height = 6.5, scale = 2)