# Comparing Permutation lp_pmd & R
## Permutation lp_pmd

In [31]:
from sparsecca._multicca_lp import lp_pmd
import pandas as pd
import numpy as np

In [14]:
def test_weights(weights_a, weights_b, perm_b, dec=5):
    """ tests whether it is solved for same weights when permuting Xn in input 
        --------
        Parameter:  weights_a: output of lp_pmd 
                    weights_b: output of lp_pmd permuted Xn
                               -> weights are of type np.ndarray in shape (N, f, K)
                                - N: len(Xn) datasets
                                - f: amount of features
                                - K: amount of MCPs
                    perm_b:    permutation of datasets
                    dec:       decimals to which weights should be rounded
                
        returns: boolean: True if rounded weights are th same
                          False if not the same
    """
    
    weights_a_rounded = weights_a.round(decimals=dec)
    weights_b_rounded = weights_b.round(decimals=dec)
    
    weights_b_ordered = []
    for o in perm_b:
        weights_b_ordered.append(weights_b_rounded[o])
        
    return all(x==True for x in (weights_a_rounded==weights_b_ordered).flatten())

In [3]:
# example input
mcca1 = pd.read_csv("../data/multicca1.csv", sep=",")
mcca2 = pd.read_csv("../data/multicca3.csv", sep=",")
mcca3 = pd.read_csv("../data/multicca4.csv", sep=",")

In [22]:
# get values only from datsets
datasets = [mcca1.iloc[:,1:7].values, mcca2.iloc[:,1:7].values, mcca3.iloc[:,1:7].values]
datasets_perm = [mcca3.iloc[:,1:7].values, mcca1.iloc[:,1:7].values, mcca2.iloc[:,1:7].values]

In [23]:
# first dataset 
ws_LA,_ = lp_pmd(datasets, [1.5, 1.5,1.5],K=1,standardize=True, mimic_R=True)
# second dataset with perm 1, 2, 0
ws_LA_perm,_ = lp_pmd(datasets_perm, [1.5, 1.5,1.5],K=1,standardize=True, mimic_R=True)

Model unknown

  Variables:
    w_i_f : Size=15, Index=w_i_f_index
        Key    : Lower : Value                : Upper : Fixed : Stale : Domain
        (0, 0) :  None :  0.12902013243175234 :  None : False : False :  Reals
        (0, 1) :  None :  -0.7267034167497635 :  None : False : False :  Reals
        (0, 2) :  None :  -0.5514961376549197 :  None : False : False :  Reals
        (0, 3) :  None :   0.3521081788300304 :  None : False : False :  Reals
        (0, 4) :  None :  0.16470519095417985 :  None : False : False :  Reals
        (1, 0) :  None :  0.22484145596723384 :  None : False : False :  Reals
        (1, 1) :  None :  -0.6475990962207132 :  None : False : False :  Reals
        (1, 2) :  None : 0.028845632100235025 :  None : False : False :  Reals
        (1, 3) :  None :   0.2656479398776398 :  None : False : False :  Reals
        (1, 4) :  None :   0.6772450381640168 :  None : False : False :  Reals
        (2, 0) :  None :  0.13210353603984287 :  None : False : 

#### Compare Output for lp_pmd

In [6]:
# output for dataset
ws_LA


array([[[ 0.12902013],
        [-0.72670342],
        [-0.55149614],
        [ 0.35210818],
        [ 0.16470519]],

       [[ 0.22484146],
        [-0.6475991 ],
        [ 0.02884563],
        [ 0.26564794],
        [ 0.67724504]],

       [[ 0.13210354],
        [-0.65148581],
        [ 0.06037329],
        [ 0.28548346],
        [ 0.68772754]]])

In [7]:
# output for dataset with perm 1,2,0
ws_LA_perm

array([[[ 0.13210354],
        [-0.65148581],
        [ 0.06037329],
        [ 0.28548346],
        [ 0.68772754]],

       [[ 0.12902013],
        [-0.72670342],
        [-0.55149614],
        [ 0.35210818],
        [ 0.16470519]],

       [[ 0.22484146],
        [-0.6475991 ],
        [ 0.02884563],
        [ 0.26564794],
        [ 0.67724504]]])

In [24]:
test_weights(ws_LA, ws_LA_perm, [1,2,0])

True

## Permutation of R-function 

In [25]:
from rpy2 import robjects
import rpy2.robjects.packages as rpackages

In [27]:
utils = rpackages.importr("utils")
utils.chooseCRANmirror(ind=1)

if not rpackages.isinstalled("PMA"):
    utils.install_packages("PMA", verbose=True)

r_pma_ws = robjects.r(
    """
    library("PMA")

    cls <- c(lat = "numeric", lon = "numeric")
    data1 <- read.table("/workspaces/sparsecca/tests/data/multicca1.csv", sep = ",", header = TRUE)
    rownames(data1) <- data1$X
    data1 <- data1[, 2:ncol(data1)]

    data2 <- read.table("/workspaces/sparsecca/tests/data/multicca3.csv", sep = ",", header = TRUE)
    rownames(data2) <- data2$X
    data2 <- data2[, 2:ncol(data2)]

    data3 <- read.table("/workspaces/sparsecca/tests/data/multicca4.csv", sep = ",", header = TRUE)
    rownames(data3) <- data3$X
    data3 <- data3[, 2:ncol(data3)]

    datasets <- list(data1, data2,data3)
    datasets_perm <- list(data3, data1,data2)
    res_1 <- MultiCCA(
        datasets,
        type = "standard",
        penalty = 1.5,
        ncomponents = 1,
        standardize = TRUE
    )
    res_2 <- MultiCCA(
        datasets_perm,
        type = "standard",
        penalty = 1.5,
        ncomponents = 1,
        standardize = TRUE
    )

    res <- list(res_1$ws,res_2$ws)
    """
)


12345671234

#### Compare Output for R (MultiCCA)

In [28]:
# output for dataset
print("\nR weigth for dataset:")
print(r_pma_ws[0])


R weigth for dataset:
[[1]]
            [,1]
[1,]  0.00000000
[2,] -0.22667345
[3,] -0.92623899
[4,]  0.05012249
[5,] -0.29696501

[[2]]
            [,1]
[1,] -0.58207714
[2,]  0.00000000
[3,] -0.80949488
[4,]  0.05781411
[5,]  0.05061388

[[3]]
           [,1]
[1,] -0.5545402
[2,]  0.0000000
[3,] -0.8231140
[4,]  0.1223458
[5,]  0.0000000




In [29]:
# output for dataset with perm 1,2,0
print("\nR weigth for dataset_perm:")
print(r_pma_ws[1])



R weigth for dataset_perm:
[[1]]
            [,1]
[1,]  0.00000000
[2,] -0.63653415
[3,]  0.00000000
[4,]  0.09853806
[5,]  0.76492779

[[2]]
           [,1]
[1,]  0.0000000
[2,] -0.8484172
[3,] -0.5100578
[4,]  0.1415250
[5,]  0.0000000

[[3]]
            [,1]
[1,]  0.01983114
[2,] -0.65498980
[3,]  0.00000000
[4,]  0.07337363
[5,]  0.75180543




In [32]:
test_weights(np.asarray(r_pma_ws[0]), np.asarray(r_pma_ws[1]), [1,2,0])

False