In [2]:
# rm(list=ls())
require(data.table)
require(MASS)
require(ggplot2)
# require(foreach)
require(gridExtra)
require(stringr)
require(matrixStats)
require(parallel)
require(abind)
require(rlist)


parallel::detectCores()
## Resizing notebook plot space
options(repr.plot.width=16, repr.plot.height=9)

Loading required package: data.table
Loading required package: MASS
Loading required package: ggplot2
Loading required package: gridExtra
Loading required package: stringr
Loading required package: matrixStats
Loading required package: parallel
Loading required package: abind
Loading required package: rlist


In [3]:
# Here's a function for creating AR draws each draw has an AR process for x years (and therefore specifying a time correlation)
simulate_time_series <- function(years, draws, corr, print=FALSE){
  X <- matrix(runif(draws*years), nrow=years, ncol=draws)
  corr_mat <- corr**abs(outer(0:(years-1), 0:(years-1), "-"))
  mvdat <- t(mvrnorm(n=draws, mu=0 * 1:years, Sigma=corr_mat, empirical=TRUE))
  ranks <- t(apply(mvdat, 1, rank, ties.method="first"))
  sorted_X <- t(apply(X, 1, sort))
  t(sapply(1:years, function(x) sorted_X[x,][ranks[x,]]))
}

In [5]:
## A bunch of params
draws <- 5000 # number of draws
years <- 25 # number of years
corr <- .98 # correlation over time we want
ages <- 10 # number of age groups we have
age_corr <- .75 # correlation we want between adjacent age groups

In [6]:
# simulate draws of AR time series for independent age groups
age_simulations <- lapply(1:ages, function(x) simulate_time_series(years, draws, corr))

In [12]:
# Restructure data so it is in 3D array with dims being c(time, age, draws)
Xtad <- aperm(array(c(sapply(age_simulations, function(x) x)), 
                    dim=c(years, draws,ages)), c(1, 3, 2))

In [15]:
# check out the correlation over time it looks good thanks to first function
head(sapply(1:ages, function(x) sapply(2:years, function(y)
  cor(Xtad[y,x,], Xtad[y-1,x,]))))

# since ages were created independently however they are uncorrelated
head(sapply(2:ages, function(x) sapply(1:years, function(y)
  cor(Xtad[y,x,], Xtad[y,x-1,]))))

0,1,2,3,4,5,6,7,8,9
0.9777503,0.9776779,0.9776912,0.9778003,0.977894,0.9773128,0.9781345,0.9784732,0.9772212,0.9775751
0.9778401,0.9776624,0.9778496,0.977959,0.9776824,0.9776767,0.9784543,0.9783413,0.9779383,0.9781905
0.9780807,0.9773805,0.9781573,0.9782313,0.9778274,0.9778883,0.9785527,0.9779139,0.9773963,0.9778589
0.9777611,0.9783554,0.9777682,0.977823,0.9781945,0.9775027,0.9783171,0.9778702,0.9770203,0.9783142
0.9776019,0.9781674,0.977291,0.9774607,0.9780133,0.9784573,0.9782896,0.978066,0.9767617,0.9779575
0.9771637,0.9784698,0.9777643,0.9777057,0.9784563,0.9778852,0.9780417,0.9783607,0.9767156,0.9784227


0,1,2,3,4,5,6,7,8
0.005111261,0.01688311,0.007592872,-0.02555783,0.018717447,-0.0008220779,-0.0049466751,-0.011272345,0.02413725
0.005104957,0.01851779,0.010739583,-0.03081211,0.018987108,0.0043247778,-0.0063241614,-0.008208239,0.02937922
0.004651239,0.01349416,0.011623795,-0.01777107,0.016622036,0.0024508918,-0.002841513,-0.010868295,0.0335652
0.004883193,0.0220178,0.013059657,-0.02585608,0.009379476,0.0058217381,-0.0015085907,-0.015962169,0.04222383
-0.005343161,0.02113175,0.017922838,-0.03044461,0.009915876,0.002707429,0.0037397893,-0.019084698,0.03872887
-0.005769102,0.01905747,0.020210584,-0.03531368,0.008573932,-0.0041296401,-0.0004324492,-0.022020747,0.03976576


In [16]:
# here I am going to devise a corr mat I want for ages but this is most likely going to be derived from data for other cases

## Using the same AR decaying matrix formula as in the time series function:
corr_mat <- age_corr**abs(outer(0:(ages-1), 0:(ages-1), "-"))

In [17]:
# this is the function that takes a 3D array and leaves the first dimension (in
# our test case that dimension is time) unchnaged while sorting the 
# 3rd dimension (this is probably always gonna be the draws dimension) 
# in order to get the desired correlation in the 2nd dimension (for us it is
# age but it could just as easily be country or cause or SDI component whatever)

draw2Dcopula_array <- function(X, cor_mat, df_return = F){
  L <- dim(X)[2]
  D <- dim(X)[3]
  Xsum <- apply(X, c(2, 3), sum)
  mvdat <- mvrnorm(n=D, mu=0 * 1:L, Sigma=cor_mat, empirical=TRUE)
  ranks <- apply(mvdat, 2, rank, ties.method="first")
  sortedXsim <- apply(Xsum, 1, function(x) sort(x, index.return=TRUE)$ix)
  sortedX <- X
  for(i in 1:L){
    sortedX[,i,] <- X[,i,sortedXsim[,i]]
  }
  Xcorr <- sortedX
  for(i in 1:L){
    Xcorr[,i,] <- sortedX[,i,ranks[,i]]
  }
  if (df_return==T) {
    return(data.table(melt(Xcorr)))
    }
      else {
          Xcorr
      }
}

In [19]:
# lets 2D copulate the data now
Xcorr <- draw2Dcopula_array(Xtad, corr_mat)

In [21]:
# correlation over our first dimension (time) remains unchanged
head(sapply(1:ages, function(x) sapply(2:years, function(y)
  cor(Xcorr[y,x,], Xcorr[y-1,x,]))))
    
# correlation over our second dimension (age) now approaches desired target somewhat
head(sapply(2:ages, function(x) sapply(1:years, function(y)
  cor(Xcorr[y,x,], Xcorr[y,x-1,]))))

0,1,2,3,4,5,6,7,8,9
0.9777503,0.9776779,0.9776912,0.9778003,0.977894,0.9773128,0.9781345,0.9784732,0.9772212,0.9775751
0.9778401,0.9776624,0.9778496,0.977959,0.9776824,0.9776767,0.9784543,0.9783413,0.9779383,0.9781905
0.9780807,0.9773805,0.9781573,0.9782313,0.9778274,0.9778883,0.9785527,0.9779139,0.9773963,0.9778589
0.9777611,0.9783554,0.9777682,0.977823,0.9781945,0.9775027,0.9783171,0.9778702,0.9770203,0.9783142
0.9776019,0.9781674,0.977291,0.9774607,0.9780133,0.9784573,0.9782896,0.978066,0.9767617,0.9779575
0.9771637,0.9784698,0.9777643,0.9777057,0.9784563,0.9778852,0.9780417,0.9783607,0.9767156,0.9784227


0,1,2,3,4,5,6,7,8
0.5396035,0.5466808,0.5203383,0.5344396,0.5439552,0.5457149,0.5454963,0.5256235,0.5338712
0.5617055,0.5640241,0.5395305,0.5553279,0.5612359,0.566842,0.5665981,0.5459108,0.5566549
0.5826635,0.5827614,0.5623521,0.5779705,0.5817825,0.5814974,0.5831465,0.5677506,0.5751876
0.5964457,0.595898,0.5747669,0.5989533,0.5971047,0.5975853,0.5942697,0.5807103,0.5960322
0.6141953,0.6085401,0.5910586,0.6104687,0.6112373,0.6146772,0.6110447,0.6004631,0.6137842
0.6299819,0.6238454,0.6106424,0.6282667,0.6327926,0.62643,0.6308245,0.6136965,0.6285925
