In [33]:
# rm(list=ls())
require(data.table)
require(MASS)
require(ggplot2)
require(foreach)
require(gridExtra)
require(doMC)
registerDoMC(cores=16)

## Resizing notebook plot space
options(repr.plot.width=16, repr.plot.height=9)

### Neal's sorting function (ref: https://github.com/nmmarquez/ar.matrix)

In [64]:
rank_time_series <- function(X, corr, ts=F, print=FALSE){
  draws <- nrow(X)
  years <- ncol(X)
  # X <- matrix(runif(draws*years), nrow=years, ncol=draws)
   
  if(ts == F) {
    corr_mat <- matrix(data=corr, nrow= years, ncol= years )
    diag(corr_mat) <- 1
  }
  
  if(ts == T) {
    corr_mat <- corr**abs(outer(0:(years-1), 0:(years-1), "-"))
  }
  
   
  if(print){
    print(corr_mat)
  }


  mvdat <- mvrnorm(n= draws, mu=0 * 1:ncol(X), Sigma=corr_mat, empirical=T)
  ranks <- apply(mvdat, 2, rank, ties.method="first")
  sorted_X <- apply(X, 2, sort)
  sapply(1:ncol(X), function(x) sorted_X[,x][ranks[,x]])
}

#### Simulations: Test with 1-D data first (one "country" and multiply years)

In [73]:
sim_1 <- data.table(t = c(1:25))
for(i in c(1:500)) {
    sim_1[, paste0("var_",i)] = cumsum(rnorm(25,0,1))
}

# head(sim_1)

## Plot:
# ggplot(  melt(sim_1, id.vars=c("t"), value.name = "value") ) +
# geom_line(aes(x = t, y = value, group = variable, color = variable)) + theme(legend.position = "none")

In [72]:
sim_2 <- dcast(melt(sim_1, id.vars=c("t"), value.name = "value"), variable ~ t)

colnames(sim_2) <- c("variable", paste0("year_", c(1:25)))
sim_2 <- sim_2[order(year_1)]

## Correlate and plot across time (start with rho == 1)

corr_RW <- cbind.data.frame(c(paste0("var_", c(1:500))) , rank_time_series(sim_2[,2:26], corr = 0.995, ts =T, print = F))
colnames(corr_RW) <- c("variable", paste0("year_", c(1:25)))

corr_RW <- melt(corr_RW, id.vars = c("variable"), value.name = "data", variable.name = "t")
corr_RW <- dcast(corr_RW, t ~ variable, value.var = "data")

## Plot:
# ggplot(  melt(corr_RW, id.vars=c("t"), value.name = "value") ) +
# geom_line(aes(x = t, y = value, group = variable, color = variable)) + theme(legend.position = "none")

### Bring in the real data:

In [74]:
edu <- fread("/home/j/WORK/01_covariates/02_inputs/education/update_2017/data/output_data/20161121_GBD2016prelim_95+_raked_2/gpr_draws_country_year_collapsed_after_sort.csv", header=T)
ldi <- data.table(fread('/share/forecasting/data/LDIpc/national_LDIpc_prepped_20161207.csv', header=T))
pops <- data.table(fread('/ihme/forecasting/data/pop/20150101_wpp/data.csv', header=T))
tfr <- data.table(fread('/ihme/forecasting/data/tfr/results/tfr_whole.csv', header=T))

Read 17321 rows and 1002 (of 1002) columns from 0.273 GB file in 00:00:05
Read 12090 rows and 1008 (of 1008) columns from 0.112 GB file in 00:00:03
Read 16748 rows and 1004 (of 1004) columns from 0.113 GB file in 00:00:04


### Collapse population to country-year level

In [88]:
pops<- pops[, lapply(.SD, function(x) sum(x)), by=c("location_id", "year_id"), .SDcols="pop" ] 
head(pop)

location_id,year_id,pop
6,1950,544112923
6,1951,558820362
6,1952,570764965
6,1953,580886559
6,1954,589955812
6,1955,598574241


### Set keys

In [90]:
setkeyv(edu, c("location_id", "year_id"))
setkeyv(ldi, c("location_id", "year_id"))
setkeyv(pops, c("location_id", "year_id"))
setkeyv(tfr, c("location_id", "year_id"))

tables()

     NAME    NROW  NCOL  MB
[1,] edu   17,321 1,002 133
[2,] ldi   12,090 1,008  93
[3,] pops  18,595     3   1
[4,] sim_1     25   501   1
[5,] sim_2    500    26   1
[6,] tfr   16,748 1,004 129
     COLS                                                                            
[1,] location_id,year_id,draw0,draw1,draw2,draw3,draw4,draw5,draw6,draw7,draw8,draw9,
[2,] location_id,ihme_loc_id,year_id,age_group_id,sex_id,LDIpc_1,LDIpc_2,LDIpc_3,LDIp
[3,] location_id,year_id,pop                                                         
[4,] t,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var
[5,] variable,year_1,year_2,year_3,year_4,year_5,year_6,year_7,year_8,year_9,year_10,
[6,] age_group_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,
     KEY                
[1,] location_id,year_id
[2,] location_id,year_id
[3,] location_id,year_id
[4,]                    
[5,]                    
[6,] location_id,year_id
Total: 358MB


### Merge them all together!

In [107]:
merged_data <- data.table(expand.grid(location_id = unique(pops[, location_id]), year_id = c(1950:2040)))
key <- c("location_id", "year_id")
setkeyv(merged_data, key)

merged_data <- merge(merged_data, edu, by = key)
merged_data

location_id,year_id,draw0,draw1,draw2,draw3,draw4,draw5,draw6,draw7,⋯,draw990,draw991,draw992,draw993,draw994,draw995,draw996,draw997,draw998,draw999
6,1950,2.645154,2.338911,2.215635,2.356409,2.311006,2.637207,2.598514,2.703716,⋯,2.623432,2.567646,2.953635,2.538061,2.742645,2.654224,2.925377,2.380497,2.988505,2.445686
6,1951,2.733160,2.418230,2.289790,2.430288,2.391773,2.711155,2.679904,2.796497,⋯,2.709355,2.647406,3.030729,2.621269,2.829784,2.740444,3.007706,2.463147,3.073169,2.527796
6,1952,2.821512,2.503471,2.372092,2.510112,2.474177,2.791896,2.767157,2.889029,⋯,2.800404,2.733829,3.107230,2.703144,2.907102,2.829295,3.092795,2.550813,3.155087,2.605103
6,1953,2.916135,2.589439,2.457069,2.588784,2.557861,2.875765,2.853647,2.978906,⋯,2.891438,2.818666,3.198463,2.786409,2.995532,2.918566,3.183755,2.637424,3.236606,2.689630
6,1954,3.009385,2.681167,2.549024,2.675051,2.645965,2.961578,2.942888,3.085022,⋯,2.975036,2.905795,3.285786,2.874933,3.088882,3.014471,3.273646,2.728941,3.329860,2.776312
6,1955,3.099141,2.774245,2.640853,2.757904,2.736391,3.046350,3.030887,3.181736,⋯,3.071211,2.999301,3.383200,2.966008,3.177256,3.107616,3.356422,2.821734,3.422766,2.866393
6,1956,3.190781,2.865815,2.732002,2.847069,2.826489,3.134597,3.125016,3.278072,⋯,3.167582,3.094218,3.475549,3.058824,3.267291,3.203273,3.442767,2.913691,3.507861,2.962856
6,1957,3.292052,2.956309,2.827100,2.938995,2.920284,3.223432,3.217436,3.371086,⋯,3.264400,3.191433,3.566597,3.157251,3.357648,3.299055,3.530320,3.010704,3.596285,3.059647
6,1958,3.389554,3.056520,2.928058,3.033190,3.018030,3.312862,3.316577,3.470046,⋯,3.358220,3.284199,3.662476,3.255497,3.450733,3.396341,3.623037,3.107790,3.680407,3.158855
6,1959,3.490454,3.158316,3.029271,3.135670,3.122357,3.401574,3.415398,3.566577,⋯,3.455479,3.373733,3.755957,3.359112,3.551219,3.494725,3.714589,3.211182,3.759802,3.262200
