In [1]:
library(dplyr)
library(tidyr)
library(readr)
library(doMC)
library(stringr)
registerDoMC(cores = 28)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: foreach

Loading required package: iterators

Loading required package: parallel



In [2]:
flist <- Sys.glob('/pool001/mfzhao/mobility/*/*')
eu_mobility <- foreach(f = flist[!str_detect(flist, 'united_states')], .combine = 'rbind') %dopar% read_csv(f)
us_mobility <- foreach(f = flist[str_detect(flist, 'united_states')], .combine = 'rbind') %dopar% read_csv(f)

In [3]:
eu_mobility %>% 
    filter(baseline_name != 'limited_february') %>%
    select(key = external_polygon_id,
           ds, 
           btvrc = all_day_bing_tiles_visited_relative_change, 
           rstu = all_day_ratio_single_tile_users) %>%
    arrange(key, ds) %>%
    mutate(key = ifelse(key == 'UKN0A', 'UKN10', key),
           key = ifelse(key == 'UKN0B', 'UKN11', key),
           key = ifelse(key == 'UKN0C', 'UKN12', key),
           key = ifelse(key == 'UKN0D', 'UKN13', key),
           key = ifelse(key == 'UKN0E', 'UKN14', key),
           key = ifelse(key == 'UKN0F', 'UKN15', key),
           key = ifelse(key == 'UKN0G', 'UKN16', key)) -> eu_mobility

us_mobility %>% 
    filter(baseline_name != 'limited_february') %>%
    select(key = external_polygon_id,
           ds, 
           btvrc = all_day_bing_tiles_visited_relative_change, 
           rstu = all_day_ratio_single_tile_users) %>%
    mutate(key = str_pad(key, 5, pad = '0')) %>%
    arrange(key, ds) -> us_mobility

In [4]:
eu_pop <- read_delim('/pool001/mfzhao/geo_data/europe_population.tsv', '\t', escape_double = FALSE, trim_ws = TRUE)

eu_pop %>%
    separate('sex,unit,age,geo\\time', into = c('sex', 'unit', 'age', 'key'), sep = ',') %>%
    filter(sex == 'T', age == 'TOTAL') %>%
    select(key, n = `2018`) %>%
    mutate(n = str_replace(n, ' p', ''),
           n = as.numeric(n)) -> eu_pop

Parsed with column specification:
cols(
  `sex,unit,age,geo\time` = [31mcol_character()[39m,
  `2019` = [31mcol_character()[39m,
  `2018` = [31mcol_character()[39m,
  `2017` = [31mcol_character()[39m,
  `2016` = [31mcol_character()[39m,
  `2015` = [31mcol_character()[39m,
  `2014` = [31mcol_character()[39m
)

“NAs introduced by coercion”


In [5]:
us_pop <- read_csv('/pool001/mfzhao/geo_data/cc-est2018-alldata.csv')

us_pop %>%
    unite('key', STATE, COUNTY, sep = '') %>%
    filter(YEAR == 11, 
           AGEGRP==0) %>% 
    group_by(key) %>%
    summarize(n = sum(TOT_POP)) -> us_pop

Parsed with column specification:
cols(
  .default = col_double(),
  SUMLEV = [31mcol_character()[39m,
  STATE = [31mcol_character()[39m,
  COUNTY = [31mcol_character()[39m,
  STNAME = [31mcol_character()[39m,
  CTYNAME = [31mcol_character()[39m
)

See spec(...) for full column specifications.



In [6]:
eu_mobility %>% 
    inner_join(eu_pop) -> eu_mobility

us_mobility %>% 
    inner_join(us_pop) -> us_mobility

Joining, by = "key"

Joining, by = "key"



In [7]:
write_csv(eu_mobility, '/pool001/mfzhao/PROCESSED_DATA/eu_mobility.csv')
write_csv(us_mobility, '/pool001/mfzhao/PROCESSED_DATA/us_mobility.csv')