### Functional CCA

This notebook verifies the concept of the initialization method of the operator A 
(1) input: functional data
(2) verifies the algorithm in the empirical domain 

In [1]:
library(pracma)
library(matrixcalc)
library(fields)
library(Matrix)
#load source file
src.path <- "../../src"
source(paste(src.path, "DataGenerationProcess", "synth_basis.R", sep="/"))
source(paste(src.path, "DataGenerationProcess", "synth_data.R", sep="/"))
source(paste(src.path, "DataGenerationProcess", "synth_graph.R", sep="/"))
source(paste(src.path, "DataGenerationProcess", "synth_linearop.R", sep="/"))

source(paste(src.path, "Estimation", "basis_estimation.R", sep="/"))
source(paste(src.path, "Utility", "utility.R", sep="/"))
source(paste(src.path, "Estimation", "cca_estimation.R", sep="/"))
source(paste(src.path, "Utility", "R2python.R", sep="/"))
###specify save path and filename


Loading required package: spam

Loading required package: dotCall64

Loading required package: grid

Spam version 2.7-0 (2021-06-25) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction 
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.


Attaching package: 'spam'


The following objects are masked from 'package:base':

    backsolve, forwardsolve


Loading required package: viridis

Loading required package: viridisLite


Try help(fields) to get started.


Attaching package: 'Matrix'


The following object is masked from 'package:spam':

    det


The following objects are masked from 'package:pracma':

    expm, lu, tril, triu


Loading required package: splines

Loading required package: fds

Loading required package: rainbow

Loading required package: MASS

Loading required package: pcaPP

Loading required package: RCurl

Loading required package: deSolve


Atta

In [2]:
cov_name="tridiag3"

###specify parameters
n <- 200
PP <- 150
k.gen <- 9
M <- 2
obs.time <- seq(0,1,1/50)


gendata <- function(N, seed_id){
for (n in c(N)){
    for (p in c(PP)){
        ## be careful for the choice of the number of basis function 
## fourier basis: km must be odd
## bspline basis km>4
km.gen <- c(9,9)

Apinv_list <- list()
A_list <- list()
N_list <- list()
basis.m_list <- list()
true.basis_list <- list()
true.values_list <- list()




#generate data from the graph

if (cov_name == "tridiag1"){
    omega <- synth.omega.tridiag1_v2(p, k.gen)
}
if (cov_name == "tridiag2"){
    omega <- synth.omega.tridiag2_v2(p, k.gen)
}
if (cov_name == "tridiag3"){
    omega <- synth.omega.tridiag3_v2(p, k.gen)
}

#ensure that the diagonal values are all 1

G.true <- matrix(0, p, p) # p by p adjacency matrix
for(i in 1:p){
  for(j in 1:p){
    if(sum(abs(omega[((i-1)*k.gen+1):(i*k.gen), ((j-1)*k.gen+1):(j*k.gen)])) > 0)
      G.true[i,j] <- 1
  }
}

cov <- solve(omega)

for(m in 1:M){
    Am <- synth.linear_op.sparse_orthogonal(k.gen, km.gen[m], 3, scale=m*0.1)
    Am <- t(t(Am) %*% diag(.2*((1:k.gen))+1)) #this is to make the singular values distinct

    A_list[[m]] <- Am
    Apinv_list[[m]] <- solve(Am)
    #noise covariance
    N_list[[m]] <- diag(p*km.gen[m])*.001
    #structured noise
    basis.m_list[[m]] <- synth.fourier.bases.m(obs.time, km.gen[m])
}

#convert to regression B
B_list <- utility.graph2B(omega,p)
# save true graphs


#generate data 
#set.seed(seed_id)
#multivariate_data <- synth.dependent_data(n, p, cov, Apinv_list, N_list)
#set.seed(seed_id)
data <- synth.data_from_graph(n, p, cov, basis.m_list, Apinv_list,N_list, dependent=TRUE, addnoise=FALSE)

#########################
# test node aggragation #
#########################

concate_data <- list()
for(m in 1:2){
        dm <- data[[m]][,1,]
    for(i in 2:p){
        dm <- rbind(dm, data[[m]][,p,])
    }
    concate_data[[m]] <- dm
}
fourier.basis1 <- create.fourier.basis(rangeval=c(0,1), nbasis=km.gen[1])
d1 <-Data2fd(argvals=obs.time, y=t(concate_data[[1]]), basisobj=fourier.basis1)
#data modality 2
fourier.basis2 <- create.fourier.basis(rangeval=c(0,1), nbasis=km.gen[2])
d2 <-Data2fd(argvals=obs.time, y=t(concate_data[[2]]), basisobj=fourier.basis2)

cca.r.est <- estimate.cca.basis_expansion(d1, d2, km.gen[1])

##save data 
est_Ac <- list()
est_Ac[[1]] <- cca.r.est$A1
est_Ac[[2]] <- cca.r.est$A2 


######################
# test single node   #
######################


fourier.basis1 <- create.fourier.basis(rangeval=c(0,1), nbasis=km.gen[1])
d1 <-Data2fd(argvals=obs.time, y=t(data[[1]][,1,]), basisobj=fourier.basis1)
#data modality 2
fourier.basis2 <- create.fourier.basis(rangeval=c(0,1), nbasis=km.gen[2])
d2 <-Data2fd(argvals=obs.time, y=t(data[[2]][,1,]), basisobj=fourier.basis2)

cca.r.est <- estimate.cca.basis_expansion(d1, d2, km.gen[1])

##save data 
est_A <- list()
est_A[[1]] <- cca.r.est$A1
est_A[[2]] <- cca.r.est$A2 


#cca.r.est <- estimate.multivariate.cca(multivariate_data[[1]][,1:9], multivariate_data[[2]][,1:9],9)

#est_Am <- list()
#est_Am[[1]] <- cca.r.est$A1
#est_Am[[2]] <- cca.r.est$A2 

}
}
A <- list()
#A$est_Am <- est_Am
A$est_A  <- est_A
A$A_list <- A_list
A$est_Ac <- est_Ac
return(A)
}

In [3]:
diff_A_list <- list()
diff_Ac_list <- list()

for(iter in 1:10){
    diff_Am <- list()
    diff_A <- list()
    diff_Ac <- list()


    for(n in c(352, 391, 440, 503, 587, 705, 881, 1175, 1762, 3525, 17626)){
        A<-gendata(n, iter)

        A_list <- A$A_list
        est_Am <- A$est_Am
        est_A  <- A$est_A 
        est_Ac <- A$est_Ac
        for(i in 1:9){
            if(norm(est_A[[1]][i,]+A_list[[1]][i,]) < norm(est_A[[1]][i,]-A_list[[1]][i,])){
                est_A[[1]][i,] <- - est_A[[1]][i,] 
            }

        }

        for(i in 1:9){
            
            if(norm(est_A[[2]][i,]+A_list[[2]][i,]) < norm(est_A[[2]][i,]-A_list[[2]][i,])){
                est_A[[2]][i,] <- - est_A[[2]][i,] 
            }
        }

        for(i in 1:9){
            if(norm(est_Am[[1]][i,]+A_list[[1]][i,]) < norm(est_Am[[1]][i,]-A_list[[1]][i,])){
                est_Am[[1]][i,] <- - est_Am[[1]][i,] 
            }

        }

        for(i in 1:9){
            
        if(norm(est_Am[[2]][i,]+A_list[[2]][i,]) < norm(est_Am[[2]][i,]-A_list[[2]][i,])){
                est_Am[[2]][i,] <- - est_Am[[2]][i,] 
            }
        }

        for(i in 1:9){
            if(norm(est_Ac[[1]][i,]+A_list[[1]][i,]) < norm(est_Ac[[1]][i,]-A_list[[1]][i,])){
                est_Ac[[1]][i,] <- - est_Ac[[1]][i,] 
            }

        }

        for(i in 1:9){
            
            if(norm(est_Ac[[2]][i,]+A_list[[2]][i,]) < norm(est_Ac[[2]][i,]-A_list[[2]][i,])){
                est_Ac[[2]][i,] <- - est_Ac[[2]][i,] 
            }
        }

        tempA <- norm(est_A[[1]]-A_list[[1]])**2 + norm(est_A[[2]]-A_list[[2]])**2
        diff_A <- c(diff_A, tempA)

        tempAm <- norm(est_Am[[1]]-A_list[[1]])**2 + norm(est_Am[[2]]-A_list[[2]])**2
        diff_Am <- c(diff_Am, tempAm)


        tempAc <- norm(est_Ac[[1]]-A_list[[1]])**2 + norm(est_Ac[[2]]-A_list[[2]])**2
        diff_Ac <- c(diff_Ac, tempAc)

    }

    diff_A_list[[iter]] <- diff_A
    diff_Ac_list[[iter]] <- diff_Ac
}



[1] "Generating data from modality 1"
[1] "Generating data from modality 2"
[1] "canonical correlation values:"
[1] 0.9999483 0.9999129 0.9998941 0.9998781 0.9998516 0.9997637 0.9997585
[8] 0.9997027 0.9996976
[1] "canonical correlation values:"
[1] 0.9999372 0.9999253 0.9999080 0.9998531 0.9998374 0.9998007 0.9997770
[8] 0.9997226 0.9995604
[1] "Generating data from modality 1"
[1] "Generating data from modality 2"
[1] "canonical correlation values:"
[1] 0.9999380 0.9999279 0.9998895 0.9998500 0.9998001 0.9997789 0.9997537
[8] 0.9997170 0.9996385
[1] "canonical correlation values:"
[1] 0.9999453 0.9999232 0.9999092 0.9998689 0.9998591 0.9997853 0.9997559
[8] 0.9996609 0.9995672
[1] "Generating data from modality 1"
[1] "Generating data from modality 2"
[1] "canonical correlation values:"
[1] 0.9999361 0.9999138 0.9998954 0.9998603 0.9998454 0.9997914 0.9997552
[8] 0.9996762 0.9995508
[1] "canonical correlation values:"
[1] 0.9999401 0.9998995 0.9998805 0.9998525 0.9998311 0.9997947 0.

In [5]:
remove_outlier <- function(data_list, a = 2){
    data.mean <- mean(unlist(data_list))
    data.std  <- std(unlist(data_list))
    return.list <- list()
    for(data in data_list){
        if(data <= data.mean + a*data.std){
            if(data >= data.mean - a*data.std){
                return.list <- append(return.list, data)
            }
        }
        
    }
    return(return.list)
}

In [9]:

Nlist = c(352, 391, 440, 503, 587, 705, 881, 1175, 1762, 3525, 17626)
for(n in 1:11){
    Alist <- list()
    for(iter in 1:10){
        A_list[[iter]] <- diff_Ac_list[[iter]][n]
    }
    print(paste('n=', Nlist[n]," ", mean(unlist(A_list)), '(',std(unlist(A_list)), ')', sep=""))
    A_list_rm <- remove_outlier(A_list)

    
}

print('remove outlier')
for(n in 1:11){
    Alist <- list()
    for(iter in 1:10){
        A_list[[iter]] <- diff_Ac_list[[iter]][n]
    }

    A_list_rm <- remove_outlier(A_list)
    
    print(paste('n=', Nlist[n], " ", mean(unlist(A_list_rm)), '(',std(unlist(A_list_rm)), ')', sep=""))
    
}

[1] "n=352 1.40601014736238(0.546455776514321)"
[1] "n=391 1.28397819752615(0.56147915999103)"
[1] "n=440 1.16344717171908(0.249424759076036)"
[1] "n=503 1.62954523282343(0.788122775910645)"
[1] "n=587 1.2689322453012(0.59955606864558)"
[1] "n=705 1.21213359675747(0.854968665144999)"
[1] "n=881 0.833694562516734(0.269887060074659)"
[1] "n=1175 0.762351866538836(0.192597668206116)"
[1] "n=1762 0.639860920868559(0.269008738042895)"
[1] "n=3525 0.538849608472021(0.14771688088566)"
[1] "n=17626 0.519059664126836(0.103997852371649)"
[1] "remove outlier"
[1] "n=352 1.40601014736238(0.546455776514321)"
[1] "n=391 1.28397819752615(0.56147915999103)"
[1] "n=440 1.16344717171908(0.249424759076036)"
[1] "n=503 1.44951347420037(0.578058011688677)"
[1] "n=587 1.12228293844347(0.403061035527002)"
[1] "n=705 0.953746825787037(0.266928749456391)"
[1] "n=881 0.833694562516734(0.269887060074659)"
[1] "n=1175 0.762351866538836(0.192597668206116)"
[1] "n=1762 0.576640703286918(0.190912178204593)"
[1] "n=3

In [10]:
for(n in 1:11){
    Alist <- list()
    for(iter in 1:10){
        A_list[[iter]] <- diff_A_list[[iter]][n]
    }
    print(paste(mean(unlist(A_list)), std(unlist(A_list))))

}


print('remove outlier')
for(n in 1:11){
    Alist <- list()
    for(iter in 1:10){
        A_list[[iter]] <- diff_A_list[[iter]][n]
    }

    A_list_rm <- remove_outlier(A_list)
    
    print(paste(mean(unlist(A_list_rm)), std(unlist(A_list_rm))))
    
}

[1] "1.3091072646587 0.777910605245543"
[1] "1.36039057733118 0.329127221509846"
[1] "1.2227273529219 0.432402340261286"
[1] "1.15795684646271 0.443674364539738"
[1] "0.993673173614625 0.362240496273784"
[1] "1.03988459734306 0.28983132085036"
[1] "1.01140839307437 0.341473104266605"
[1] "0.792886077887757 0.248742405038068"
[1] "0.61092575654259 0.244165891699361"
[1] "0.590602285716118 0.226684622245258"
[1] "0.499299085155006 0.0816936876994606"
[1] "remove outlier"
[1] "1.11753509348592 0.517604274358638"
[1] "1.36039057733118 0.329127221509846"
[1] "1.2227273529219 0.432402340261286"
[1] "1.15795684646271 0.443674364539738"
[1] "0.993673173614625 0.362240496273784"
[1] "1.03988459734306 0.28983132085036"
[1] "1.01140839307437 0.341473104266605"
[1] "0.736466789838866 0.183838519774203"
[1] "0.61092575654259 0.244165891699361"
[1] "0.590602285716118 0.226684622245258"
[1] "0.480063789485365 0.0578414141582017"
