# **In this notebook we run the R methods WNN and MOFA+ for benchmarking clustering**

All data used for benchmarking can be downloaded from the command line (see commented code below)

In [None]:
# Loom files for simulation data
# Run code below in command line
# wget --content-disposition https://data.caltech.edu/records/mzs5b-72897/files/meK_sim_looms.tar.gz?download=1
# tar -xvf meK_sim_looms.tar.gz

# wget --content-disposition https://data.caltech.edu/records/2tsha-rcz19/files/meK_looms.tar.gz?download=1
# tar -xvf meK_looms.tar.gz

In [1]:
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("scater")
# install.packages('Seurat')

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

'getOption("repos")' replaces Bioconductor standard repositories, see
'help("repositories", package = "BiocManager")' for details.
Replacement repositories:
    CRAN: https://cran.r-project.org

Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.2 (2023-10-31)

Installing package(s) 'BiocVersion', 'scater'

also installing the dependencies ‘zlibbioc’, ‘bitops’, ‘XVector’, ‘sparseMatrixStats’, ‘RCurl’, ‘GenomeInfoDbData’, ‘abind’, ‘lambda.r’, ‘futile.options’, ‘sitmo’, ‘GenomicRanges’, ‘DelayedMatrixStats’, ‘Biobase’, ‘IRanges’, ‘GenomeInfoDb’, ‘S4Arrays’, ‘SparseArray’, ‘matrixStats’, ‘RcppHNSW’, ‘ScaledMatrix’, ‘irlba’, ‘rsvd’, ‘futile.logger’, ‘snow’, ‘BH’, ‘beeswarm’, ‘vipor’, ‘gridExtra’, ‘FNN’, ‘RcppAnnoy’, ‘RcppProgress’, ‘dqrng’, ‘Cairo’, ‘png’, ‘SingleCellExperiment’, ‘scuttle’, ‘BiocGenerics’, ‘S4Vectors’, ‘SummarizedExperiment’, ‘DelayedArray’, ‘MatrixGenerics’, ‘beachmat’, ‘BiocNeighbors’, ‘

In [None]:
# if (!requireNamespace("remotes", quietly = TRUE)) {
#   install.packages("remotes")
# }
# remotes::install_github("mojaveazure/seurat-disk")

In [15]:
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("MOFA2")

'getOption("repos")' replaces Bioconductor standard repositories, see
'help("repositories", package = "BiocManager")' for details.
Replacement repositories:
    CRAN: https://cran.r-project.org

Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.2 (2023-10-31)

Installing package(s) 'MOFA2'

also installing the dependencies ‘filelock’, ‘Rhdf5lib’, ‘rhdf5filters’, ‘dir.expiry’, ‘basilisk.utils’, ‘rhdf5’, ‘HDF5Array’, ‘corrplot’, ‘basilisk’


Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Old packages: 'beachmat', 'BiocManager', 'boot', 'bslib', 'callr', 'codetools',
  'crul', 'curl', 'data.table', 'DBI', 'dbplyr', 'deldir', 'digest', 'fs',
  'future', 'future.apply', 'GenomeInfoDb', 'ggplot2', 'globals', 'gtable',
  'hdf5r', 'htmltools', 'httpuv', 'igraph', 'knitr', 'lattice', 'lava',
  'matrixStats', 'munsell', 'openssl', 'parallelly', 'processx', 'promises',
  'quantmod', 'ragg', 'RcppArmadillo', 'RcppEigen', 'recipes', 'remotes',
  'repr', 'retic

In [46]:
# install.packages('aricode')

In [100]:
library(scater)
library(Seurat)
library(SeuratDisk)
# library(SeuratData)
library(patchwork)
library(aricode)
library(tidyverse)
library(MOFA2)

In [101]:
num_runs <- 3

## **Run on benchmark data**

In [102]:
looms <- c('./hvg_objs_0215/cl5_357hvgs.loom',
 './hvg_objs_0215/cl5_1193all.loom',
 './hvg_objs_0215/allen_b02h01a02_359hvgs.loom',
 './hvg_objs_0215/cl3_466hvgs.loom',
 './hvg_objs_0215/allen_b08_682hvgs.loom',
 './hvg_objs_0215/allen_b02h01a02_2770all.loom',
 './hvg_objs_0215/cl3_1137all.loom',
 './hvg_objs_0215/allen_b08_1948all.loom')

#'allen_b08', 'allen_b02h01', 'cl3', 'cl5', 'brca1', 'pbmc','e11e13'
shorts <- c('cl5','cl5','allen_b02h01a02','cl3','allen_b08','allen_b02h01a02',
           'cl3','allen_b08')

### **MOFA2**

In [77]:
res <- 1
xy <- vector("list", num_runs*2*length(shorts)) # create an empty list into which values are to be filled
ind <- 0

for (o in 1:length(looms)){
    a08 <- Connect(filename = looms[o], mode = "r")
    
    
    # gene list
    n_genes <- a08[['row_attrs']][['gene_name']][['dims']]
    gns <- a08[['row_attrs']][['gene_name']][1:n_genes]
    
    # cell ID list
    n_cells <- a08[['col_attrs']][['barcode']][['dims']]
    cellids <- a08[['col_attrs']][['barcode']][1:n_cells]
    celllabs <- a08[['col_attrs']][['subclass_label']][1:n_cells]
    
    # get raw counts matrix
    raw.cnts <- t(a08[["layers/unspliced"]][,])
    colnames(raw.cnts) <- cellids
    # rownames(raw.cnts) <- gns
    
    
    metadata <- data.frame(
        cellID = cellids
        )
    rownames(metadata) <- cellids
    
    
    
    s_obj <- CreateSeuratObject(counts = raw.cnts,
                                project = "fromLoom",
                                assay = "URNA",
                                meta.data = metadata)
    
    sraw.cnts <- t(a08[["layers/spliced"]][,])
    colnames(sraw.cnts) <- cellids

    a08$close_all()

    num_features <- nrow(s_obj)
    print(looms[o])
    
    for (nr in 1:num_runs){
        # --- Running MOFA2 -----
        MOFAobject <- create_mofa(list(raw.cnts,sraw.cnts))
        data_opts <- get_default_data_options(MOFAobject)
        data_opts$scale_views <- TRUE
        model_opts <- get_default_model_options(MOFAobject)
        train_opts <- get_default_training_options(MOFAobject)
    
        MOFAobject <- prepare_mofa(
          object = MOFAobject,
          data_options = data_opts,
          model_options = model_opts,
          training_options = train_opts
        )
        outfile = file.path(getwd(),"model.hdf5")
        MOFAobject.trained <- run_mofa(MOFAobject, outfile)
        
        model <- load_model(outfile)
    
        factors <- get_factors(model)$group1
    
    
        pca_embeddings_mofa <- factors
        rownames(pca_embeddings_mofa) <- colnames(s_obj)
        colnames(pca_embeddings_mofa) <- colnames(factors)
        s_obj[["test"]] <- CreateDimReducObject(embeddings = as.matrix(pca_embeddings_mofa), key='Factor', assay = 'URNA')
        
        s_obj <- FindNeighbors(s_obj,reduction = 'test',graph.name = 'mofa',dims =1:dim(factors)[2])
    
        
        #Get leiden clusters for different res
        s_obj <- FindClusters(s_obj,graph.name='mofa',  algorithm = 4, resolution = res, verbose = FALSE, cluster.name='leiden')
        ind <- ind+1
        xy[[ind]] <- data.frame(Method = 'MOFA2-Leiden',
                            Clustering = I(list(s_obj[['leiden']]$leiden)),
                            Matrices = 'U,S',
                            Hyperparam = res,
                            HVGs = num_features ,
                            Data = shorts[o],
                            ARI = ARI(celllabs,s_obj[['leiden']]$leiden),
                            AMI = AMI(celllabs,s_obj[['leiden']]$leiden))

        print(ARI(celllabs,s_obj[['leiden']]$leiden))
    
    
    
        k <- length(unique(celllabs))
        d <- cluster_samples(model, k=k)

        ind <- ind+1
        xy[[ind]] <- data.frame(Method = 'MOFA2-K-Means',
                            Clustering = paste(d$cluster,collapse="|"),
                            Matrices = 'U,S',
                            Hyperparam = res,
                            HVGs = num_features ,
                            Data = shorts[o],
                            ARI = ARI(celllabs,d$cluster),
                            AMI = AMI(celllabs,d$cluster))
        print(ARI(celllabs,d$cluster))
    
        # --- Running MOFA2 -----
    }
    

    

}

xy <- do.call(rbind, xy)

xy %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = '|')) %>% 
  write.csv('./fits/Seuratruns_0508/MOFA_bench_results.csv', row.names = FALSE)

head(xy)
#shorts[o]

“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/cl5_357hvgs.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5606682
[1] 0.7344283


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5606682
[1] 0.6284335


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5606682
[1] 0.8961504


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/cl5_1193all.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5452449
[1] 0.6902323


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5452449
[1] 0.7207902


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5452449
[1] 0.8075214


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/allen_b02h01a02_359hvgs.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4263297
[1] 0.3525554


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4263297
[1] 0.2213768


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4263297
[1] 0.2582337


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/cl3_466hvgs.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4419543
[1] 0.6561983


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4419543
[1] 0.6068863


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4419543
[1] 0.6356439


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/allen_b08_682hvgs.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.2973622
[1] 0.231527


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.2973622
[1] 0.2141116


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.2973622
[1] 0.2506011


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/allen_b02h01a02_2770all.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4480626
[1] 0.2006984


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4480626
[1] 0.2140604


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.4480626
[1] 0.263312


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/cl3_1137all.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5136633
[1] 0.9932934


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5136633
[1] 0.4477983


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.5136633
[1] 0.9932934


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "./hvg_objs_0215/allen_b08_1948all.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.3108353
[1] 0.2529561


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.3108353
[1] 0.2048092


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“Keys should 

[1] 0.3108353
[1] 0.217718


Unnamed: 0_level_0,Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
Unnamed: 0_level_1,<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
2,MOFA2-K-Means,4|4|3|4|....,"U,S",1,357,cl5,0.7344283,0.7512999
3,MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
4,MOFA2-K-Means,4|4|5|4|....,"U,S",1,357,cl5,0.6284335,0.7499459
5,MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
6,MOFA2-K-Means,1|1|2|1|....,"U,S",1,357,cl5,0.8961504,0.861245


In [78]:
xy

Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
MOFA2-K-Means,4|4|3|4|....,"U,S",1,357,cl5,0.7344283,0.7512999
MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
MOFA2-K-Means,4|4|5|4|....,"U,S",1,357,cl5,0.6284335,0.7499459
MOFA2-Leiden,"1, 1, 10....","U,S",1,357,cl5,0.5606682,0.629609
MOFA2-K-Means,1|1|2|1|....,"U,S",1,357,cl5,0.8961504,0.861245
MOFA2-Leiden,"1, 1, 7,....","U,S",1,1193,cl5,0.5452449,0.63011
MOFA2-K-Means,2|2|1|2|....,"U,S",1,1193,cl5,0.6902323,0.7032125
MOFA2-Leiden,"1, 1, 7,....","U,S",1,1193,cl5,0.5452449,0.63011
MOFA2-K-Means,4|4|3|4|....,"U,S",1,1193,cl5,0.7207902,0.7264055


### **WNN**

In [103]:
res <- c(0.75,1,1.5,2)
xy <- vector("list", num_runs*length(res)*length(shorts)) # create an empty list into which values are to be filled

ind <- 0 
for (o in 1:length(looms)){
    a08 <- Connect(filename = looms[o], mode = "r")
    
    
    # gene list
    n_genes <- a08[['row_attrs']][['gene_name']][['dims']]
    gns <- a08[['row_attrs']][['gene_name']][1:n_genes]
    
    # cell ID list
    n_cells <- a08[['col_attrs']][['barcode']][['dims']]
    cellids <- a08[['col_attrs']][['barcode']][1:n_cells]
    celllabs <- a08[['col_attrs']][['subclass_label']][1:n_cells]
    
    # get raw counts matrix
    raw.cnts <- t(a08[["layers/unspliced"]][,])
    colnames(raw.cnts) <- cellids
    # rownames(raw.cnts) <- gns
    
    
    metadata <- data.frame(
        cellID = cellids
        )
    rownames(metadata) <- cellids
    
    
    
    s_obj <- CreateSeuratObject(counts = raw.cnts,
                                project = "fromLoom",
                                assay = "URNA",
                                meta.data = metadata)
    
    sraw.cnts <- t(a08[["layers/spliced"]][,])
    colnames(sraw.cnts) <- cellids
    
    s_assay <- CreateAssay5Object(counts = sraw.cnts)
    
    s_obj[["SRNA"]] <- s_assay
    
    a08$close_all()
    
    DefaultAssay(s_obj) <- 'URNA'
    s_obj <- NormalizeData(s_obj) 
    # s_obj <- FindVariableFeatures(s_obj) 
    s_obj <- ScaleData(s_obj) 
    s_obj <- RunPCA(s_obj,reduction.name = 'upca', features = Features(s_obj))
    
    DefaultAssay(s_obj) <- 'SRNA'
    s_obj <- NormalizeData(s_obj) 
    # s_obj <- FindVariableFeatures(s_obj)
    s_obj <- ScaleData(s_obj)
    s_obj <- RunPCA(s_obj,reduction.name = 'spca',features = Features(s_obj))
    
    for (nr in 1:num_runs){
        s_obj <- FindMultiModalNeighbors(
          s_obj, reduction.list = list("upca", "spca"), 
          dims.list = list(1:30, 1:30), modality.weight.name = "RNA.weight"
        )
        print(nr)
        #Run Leiden here + save results
        num_features <- nrow(s_obj)
        
        
        for (i in 1:length(res)) {
          ind <- ind + 1
          #Get leiden clusters for different res
          s_obj <- FindClusters(s_obj, graph.name = "wsnn", algorithm = 4, resolution = res[i], verbose = FALSE, cluster.name='leiden')
          #ind <- (o-1)*length(res) + i 
          xy[[ind]] <- data.frame(Method = 'WNN-Leiden',
                                Clustering = I(list(s_obj[['leiden']]$leiden)),
                                Matrices = 'U,S',
                                Hyperparam = res[i],
                                HVGs = num_features ,
                                Data = shorts[o],
                                ARI = ARI(celllabs,s_obj[['leiden']]$leiden),
                                AMI = AMI(celllabs,s_obj[['leiden']]$leiden))
        }
    }

}

xy <- do.call(rbind, xy)

xy %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = '|')) %>% 
  write.csv('./fits/Seuratruns_0508/WNN_bench_results.csv', row.names = FALSE)
  #write.csv('./fits/Seuratruns_0215/WNN_bench_results.csv', row.names = FALSE)


head(xy)
#shorts[o]

“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature120, Feature305, Feature74, Feature170, Feature270, Feature54, Feature57, Feature100, Feature236, Feature86 
	   Feature8, Feature55, Feature317, Feature82, Feature203, Feature240, Feature96, Feature140, Feature278, Feature122 
	   Feature159, Feature216, Feature324, Feature87, Feature251, Feature53, Feature243, Feature237, Feature220, Feature310 
Negative:  Feature334, Feature81, Feature181, Feature303, Feature177, Feature302, Feature212, Feature84, Feature133, Feature304 
	   Feature110, Feature35, Feature91, Feature47, Feature336, Feature138, Feature178, Feature106, Feature254, Feature332 
	   Feature257, Feature46, Feature245, Feature233, Feature226, Feature311, Feature51, Feature356, Feature264, Feature287 
PC_ 2 
Positive:  Feature165, Feature332, Feature45, Feature46, Feature172, Feature334, Fea

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature932, Feature212, Feature269, Feature814, Feature1065, Feature921, Feature1022, Feature280, Feature748, Feature387 
	   Feature333, Feature602, Feature169, Feature88, Feature901, Feature881, Feature168, Feature176, Feature1166, Feature1081 
	   Feature28, Feature184, Feature262, Feature1009, Feature1163, Feature309, Feature863, Feature165, Feature860, Feature859 
Negative:  Feature747, Feature900, Feature1020, Feature1093, Feature214, Feature380, Feature268, Feature644, Feature311, Feature1089 
	   Feature1087, Feature935, Feature2, Feature825, Feature225, Feature849, Feature990, Feature223, Feature55, Feature1103 
	   Feature640, Feature1032, Feature910, Feature425, Feature601, Feature920, Feature460, Feature433, Feature1133, Feature552 
PC_ 2 
Positive:  Feature1115, Feature267, Feature1111, Feature10

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature275, Feature49, Feature338, Feature83, Feature179, Feature165, Feature56, Feature106, Feature28, Feature180 
	   Feature308, Feature276, Feature16, Feature234, Feature230, Feature109, Feature107, Feature161, Feature128, Feature116 
	   Feature99, Feature42, Feature61, Feature186, Feature175, Feature154, Feature204, Feature330, Feature171, Feature242 
Negative:  Feature279, Feature29, Feature148, Feature174, Feature296, Feature266, Feature189, Feature288, Feature125, Feature299 
	   Feature246, Feature67, Feature144, Feature40, Feature340, Feature322, Feature192, Feature64, Feature72, Feature331 
	   Feature4, Feature8, Feature224, Feature219, Feature309, Feature318, Feature191, Feature115, Feature146, Feature71 
PC_ 2 
Positive:  Feature276, Feature227, Feature113, Feature164, Feature47, Feature116, Fe

[1] 1


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature107, Feature86, Feature353, Feature69, Feature287, Feature60, Feature239, Feature402, Feature337, Feature227 
	   Feature80, Feature155, Feature286, Feature64, Feature362, Feature336, Feature458, Feature236, Feature54, Feature348 
	   Feature127, Feature410, Feature79, Feature323, Feature331, Feature193, Feature432, Feature314, Feature274, Feature222 
Negative:  Feature344, Feature431, Feature53, Feature404, Feature427, Feature14, Feature248, Feature389, Feature283, Feature388 
	   Feature426, Feature220, Feature302, Feature99, Feature154, Feature129, Feature206, Fea

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature136, Feature425, Feature504, Feature209, Feature198, Feature623, Feature519, Feature441, Feature24, Feature584 
	   Feature620, Feature94, Feature117, Feature45, Feature215, Feature434, Feature299, Feature337, Feature565, Feature595 
	   Feature156, Feature38, Feature529, Feature427, Feature61, Feature19, Feature497, Feature612, Feature677, Feature257 
Negative:  Feature77, Feature531, Feature89, Feature541, Feature297, Feature577, Feature243, Feature471, Feature399, Feature192 
	   Feature159, Feature286, Feature527, Feature133, Feature494, Feature18, Feature364, Feature375, Feature95, Feature671 
	   Feature411, Feature414, Feature407, Feature87, Feature173, Feature619, Feature473, Feature134, Feature350, Feature515 
PC_ 2 
Positive:  Feature318, Feature545, Feature355, Feature416, Feature514, Featur

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature969, Feature2142, Feature205, Feature1331, Feature1454, Feature2056, Feature2289, Feature2260, Feature1144, Feature1107 
	   Feature1480, Feature590, Feature644, Feature269, Feature1798, Feature2494, Feature1485, Feature595, Feature1946, Feature1638 
	   Feature2607, Feature573, Feature1602, Feature262, Feature2535, Feature2505, Feature100, Feature1581, Feature57, Feature1567 
Negative:  Feature1187, Feature865, Feature1521, Feature1168, Feature952, Feature461, Feature2665, Feature2275, Feature2112, Feature1375 
	   Feature1202, Feature2356, Feature524, Feature578, Feature220, Feature1287, Feature2493, Feature199, Feature2571, Feature559 
	   Feature1179, Feature153, Feature1737, Feature1858, Feature1820, Feature1879, Feature2619, Feature2008, Feature2648, Feature2576 
PC_ 2 
Positive:  Feature333, Fea

[1] 1


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“sparse->dense coercion: allocating vector of size 4.1 GiB”
“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature838, Feature123, Feature1062, Feature1131, Feature948, Feature978, Feature1034, Feature29, Feature1033, Feature1037 
	   Feature1038, Feature609, Feature255, Feature949, Feature228, Feature365, Feature935, Feature219, Feature865, Feature1102 
	   Feature456, Feature690, Feature945, Feature821, Feature400, Feature201, Feature866, Feature259, Feature942, Feature295 
Negative:  Feature193, Feature892, Feature235, Feature855, Feature164, Feature696, Feature396, Feature153, Feature556, Feature976 
	   Feature582, Feature819, Feature368, Feature180, Feature695, Feature157,

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature206, Feature1508, Feature224, Feature383, Feature1935, Feature1525, Feature882, Feature1135, Feature475, Feature709 
	   Feature1636, Feature659, Feature1143, Feature262, Feature1348, Feature585, Feature842, Feature171, Feature1400, Feature1052 
	   Feature1082, Feature1914, Feature428, Feature41, Feature1352, Feature1497, Feature1323, Feature242, Feature394, Feature1772 
Negative:  Feature405, Feature1222, Feature1447, Feature1170, Feature621, Feature599, Feature756, Feature1258, Feature1477, Feature57 
	   Feature1674, Feature1777, Feature636, Feature240, Feature313, Feature105, Feature886, Feature1773, Feature1241, Feature1703 
	   Feature1505, Feature1593, Feature47, Feature83, Feature988, Feature753, Feature471, Feature50, Feature163, Feature1264 
PC_ 2 
Positive:  Feature944, Feature1535, Feature

[1] 1


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 2


Calculating cell-specific modality weights

Finding 20 nearest neighbors for each modality.

Calculating kernel bandwidths

“The number of provided modality.weight.name is not equal to the number of modalities. URNA.weight SRNA.weight are used to store the modality weights”
Finding multimodal neighbors

Constructing multimodal KNN graph

Constructing multimodal SNN graph



[1] 3


Unnamed: 0_level_0,Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
Unnamed: 0_level_1,<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,WNN-Leiden,"3, 3, 4,....","U,S",0.75,357,cl5,0.7630633,0.7244586
2,WNN-Leiden,"2, 2, 4,....","U,S",1.0,357,cl5,0.5834477,0.6410981
3,WNN-Leiden,"6, 6, 3,....","U,S",1.5,357,cl5,0.5008523,0.5927889
4,WNN-Leiden,"5, 5, 9,....","U,S",2.0,357,cl5,0.4045409,0.5501903
5,WNN-Leiden,"3, 3, 4,....","U,S",0.75,357,cl5,0.7630633,0.7244586
6,WNN-Leiden,"2, 2, 4,....","U,S",1.0,357,cl5,0.5834477,0.6410981


In [104]:
xy

Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
WNN-Leiden,"3, 3, 4,....","U,S",0.75,357,cl5,0.7630633,0.7244586
WNN-Leiden,"2, 2, 4,....","U,S",1.00,357,cl5,0.5834477,0.6410981
WNN-Leiden,"6, 6, 3,....","U,S",1.50,357,cl5,0.5008523,0.5927889
WNN-Leiden,"5, 5, 9,....","U,S",2.00,357,cl5,0.4045409,0.5501903
WNN-Leiden,"3, 3, 4,....","U,S",0.75,357,cl5,0.7630633,0.7244586
WNN-Leiden,"2, 2, 4,....","U,S",1.00,357,cl5,0.5834477,0.6410981
WNN-Leiden,"6, 6, 3,....","U,S",1.50,357,cl5,0.5008523,0.5927889
WNN-Leiden,"5, 5, 9,....","U,S",2.00,357,cl5,0.4045409,0.5501903
WNN-Leiden,"3, 3, 4,....","U,S",0.75,357,cl5,0.7630633,0.7244586
WNN-Leiden,"2, 2, 4,....","U,S",1.00,357,cl5,0.5834477,0.6410981


## **Run on simulation data**

In [88]:
looms <- c('./home/tchari/counts/meKMeans_sims/loom/negctrl_sim_k1.loom',
 './home/tchari/counts/meKMeans_sims/loom/b_beta_sim_k8.loom',
 './home/tchari/counts/meKMeans_sims/loom/k_gamma_sim_k8.loom')

#'allen_b08', 'allen_b02h01', 'cl3', 'cl5', 'brca1', 'pbmc','e11e13'
shorts <- c('negctrl_sim_k1','b_beta_sim_k8','k_gamma_sim_k8')

### **MOFA2**

In [92]:
res <- 1
xy <- vector("list", num_runs*2*length(shorts)) # create an empty list into which values are to be filled
ind <- 0

for (o in 1:length(looms)){
    a08 <- Connect(filename = looms[o], mode = "r")
    
    
    # gene list
    n_genes <- a08[['row_attrs']][['gene_name']][['dims']]
    gns <- a08[['row_attrs']][['gene_name']][1:n_genes]
    
    # cell ID list
    n_cells <- a08[['col_attrs']][['barcode']][['dims']]
    cellids <- a08[['col_attrs']][['barcode']][1:n_cells]
    celllabs <- a08[['col_attrs']][['subclass_label']][1:n_cells]
    
    # get raw counts matrix
    raw.cnts <- t(a08[["layers/unspliced"]][,])
    colnames(raw.cnts) <- cellids
    # rownames(raw.cnts) <- gns
    
    
    metadata <- data.frame(
        cellID = cellids
        )
    rownames(metadata) <- cellids
    
    
    
    s_obj <- CreateSeuratObject(counts = raw.cnts,
                                project = "fromLoom",
                                assay = "URNA",
                                meta.data = metadata)
    
    sraw.cnts <- t(a08[["layers/spliced"]][,])
    colnames(sraw.cnts) <- cellids

    a08$close_all()

    num_features <- nrow(s_obj)
    print(looms[o])
    
    for (nr in 1:num_runs){
        # --- Running MOFA2 -----
        MOFAobject <- create_mofa(list(raw.cnts,sraw.cnts))
        data_opts <- get_default_data_options(MOFAobject)
        data_opts$scale_views <- TRUE
        model_opts <- get_default_model_options(MOFAobject)
        train_opts <- get_default_training_options(MOFAobject)
    
        MOFAobject <- prepare_mofa(
          object = MOFAobject,
          data_options = data_opts,
          model_options = model_opts,
          training_options = train_opts
        )
        outfile = file.path(getwd(),"model.hdf5")
        MOFAobject.trained <- run_mofa(MOFAobject, outfile)
        
        model <- load_model(outfile)
    
        factors <- get_factors(model)$group1
    
    
        pca_embeddings_mofa <- factors
        rownames(pca_embeddings_mofa) <- colnames(s_obj)
        colnames(pca_embeddings_mofa) <- colnames(factors)
        s_obj[["test"]] <- CreateDimReducObject(embeddings = as.matrix(pca_embeddings_mofa), key='Factor', assay = 'URNA')
        
        s_obj <- FindNeighbors(s_obj,reduction = 'test',graph.name = 'mofa',dims =1:dim(factors)[2])
    
        
        #Get leiden clusters for different res
        s_obj <- FindClusters(s_obj,graph.name='mofa',  algorithm = 4, resolution = res, verbose = FALSE, cluster.name='leiden')
        ind <- ind+1
        xy[[ind]] <- data.frame(Method = 'MOFA2-Leiden',
                            Clustering = I(list(s_obj[['leiden']]$leiden)),
                            Matrices = 'U,S',
                            Hyperparam = res,
                            HVGs = num_features ,
                            Data = shorts[o],
                            ARI = ARI(celllabs,s_obj[['leiden']]$leiden),
                            AMI = AMI(celllabs,s_obj[['leiden']]$leiden))

        print(ARI(celllabs,s_obj[['leiden']]$leiden))
    
    
        if (grepl('negctrl', looms[o], fixed = TRUE)){
            k <- length(unique(s_obj[['leiden']]$leiden)) 
        }
        else{
            k <- length(unique(celllabs))
        }
        d <- cluster_samples(model, k=k)

        ind <- ind+1
        xy[[ind]] <- data.frame(Method = 'MOFA2-K-Means',
                            Clustering = paste(d$cluster,collapse="|"),
                            Matrices = 'U,S',
                            Hyperparam = k,
                            HVGs = num_features ,
                            Data = shorts[o],
                            ARI = ARI(celllabs,d$cluster),
                            AMI = AMI(celllabs,d$cluster))

        print(ARI(celllabs,d$cluster))
    
        # --- Running MOFA2 -----
    }
    

    

}

xy <- do.call(rbind, xy)

xy %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = '|')) %>% 
  write.csv('./fits/Seuratruns_0508/MOFA_sim_results.csv', row.names = FALSE)

head(xy)
#shorts[o]

“Data is of class matrix. Coercing to dgCMatrix.”


[1] "/home/tchari/counts/meKMeans_sims/loom/negctrl_sim_k1.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“All 20 facto

[1] 0
[1] 0


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“All 20 facto

[1] 0
[1] 0


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
“All 20 facto

[1] 0
[1] 0


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "/home/tchari/counts/meKMeans_sims/loom/b_beta_sim_k8.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
7 factors wer

[1] 1
[1] 0.8611466


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
7 factors wer

[1] 1
[1] 0.9724933


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
7 factors wer

[1] 1
[1] 1


“Data is of class matrix. Coercing to dgCMatrix.”


[1] "/home/tchari/counts/meKMeans_sims/loom/k_gamma_sim_k8.loom"


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
15 factors we

[1] 0.4418632
[1] 1


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
15 factors we

[1] 0.4418632
[1] 0.5670456


Creating MOFA object from a list of matrices (features as rows, sample as columns)...


View names are not specified in the data, using default: view_1, view_2


“Feature names are not specified for view 1, using default: feature1_v1, feature2_v1...”
“Feature names are not specified for view 2, using default: feature1_v2, feature2_v2...”
Checking data options...

Checking training options...

Checking model options...


Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“The latest mofapy2 version is 0.7.0, you are using 0.7.1. Please upgrade with 'pip install mofapy2'”
15 factors we

[1] 0.4418632
[1] 0.8036055


Unnamed: 0_level_0,Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
Unnamed: 0_level_1,<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0,0
2,MOFA2-K-Means,12|1|12|....,"U,S",21,1100,negctrl_sim_k1,0,0
3,MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0,0
4,MOFA2-K-Means,19|4|5|1....,"U,S",21,1100,negctrl_sim_k1,0,0
5,MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0,0
6,MOFA2-K-Means,1|19|20|....,"U,S",21,1100,negctrl_sim_k1,0,0


In [93]:
xy

Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0.0,0.0
MOFA2-K-Means,12|1|12|....,"U,S",21,1100,negctrl_sim_k1,0.0,0.0
MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0.0,0.0
MOFA2-K-Means,19|4|5|1....,"U,S",21,1100,negctrl_sim_k1,0.0,0.0
MOFA2-Leiden,"10, 7, 1....","U,S",1,1100,negctrl_sim_k1,0.0,0.0
MOFA2-K-Means,1|19|20|....,"U,S",21,1100,negctrl_sim_k1,0.0,0.0
MOFA2-Leiden,"9, 9, 9,....","U,S",1,1100,b_beta_sim_k8,1.0,1.0
MOFA2-K-Means,7|7|7|7|....,"U,S",10,1100,b_beta_sim_k8,0.8611466,0.9167031
MOFA2-Leiden,"9, 9, 9,....","U,S",1,1100,b_beta_sim_k8,1.0,1.0
MOFA2-K-Means,1|1|1|1|....,"U,S",10,1100,b_beta_sim_k8,0.9724933,0.9659187


### **WNN**

In [98]:
res <- c(0.75,1,1.5,2)
xy <- vector("list", num_runs*length(res)*length(shorts)) # create an empty list into which values are to be filled

ind <- 0
for (o in 1:length(looms)){
    a08 <- Connect(filename = looms[o], mode = "r")
    
    
    # gene list
    n_genes <- a08[['row_attrs']][['gene_name']][['dims']]
    gns <- a08[['row_attrs']][['gene_name']][1:n_genes]
    
    # cell ID list
    n_cells <- a08[['col_attrs']][['barcode']][['dims']]
    cellids <- a08[['col_attrs']][['barcode']][1:n_cells]
    celllabs <- a08[['col_attrs']][['subclass_label']][1:n_cells]
    
    # get raw counts matrix
    raw.cnts <- t(a08[["layers/unspliced"]][,])
    colnames(raw.cnts) <- cellids
    # rownames(raw.cnts) <- gns
    
    
    metadata <- data.frame(
        cellID = cellids
        )
    rownames(metadata) <- cellids
    
    
    
    s_obj <- CreateSeuratObject(counts = raw.cnts,
                                project = "fromLoom",
                                assay = "URNA",
                                meta.data = metadata)
    
    sraw.cnts <- t(a08[["layers/spliced"]][,])
    colnames(sraw.cnts) <- cellids
    
    s_assay <- CreateAssay5Object(counts = sraw.cnts)
    
    s_obj[["SRNA"]] <- s_assay
    
    a08$close_all()
    
    DefaultAssay(s_obj) <- 'URNA'
    s_obj <- NormalizeData(s_obj) 
    # s_obj <- FindVariableFeatures(s_obj) 
    s_obj <- ScaleData(s_obj) 
    s_obj <- RunPCA(s_obj,reduction.name = 'upca', features = Features(s_obj))
    
    DefaultAssay(s_obj) <- 'SRNA'
    s_obj <- NormalizeData(s_obj) 
    # s_obj <- FindVariableFeatures(s_obj)
    s_obj <- ScaleData(s_obj)
    s_obj <- RunPCA(s_obj,reduction.name = 'spca',features = Features(s_obj))
    
    for (nr in 1:num_runs){
        s_obj <- FindMultiModalNeighbors(
          s_obj, reduction.list = list("upca", "spca"), 
          dims.list = list(1:30, 1:30), modality.weight.name = "RNA.weight"
        )
        
        #Run Leiden here + save results
        num_features <- nrow(s_obj)
        
        
        for (i in 1:length(res)) {
          #Get leiden clusters for different res
          s_obj <- FindClusters(s_obj, graph.name = "wsnn", algorithm = 4, resolution = res[i], verbose = FALSE, cluster.name='leiden')
          ind <- ind + 1
          xy[[ind]] <- data.frame(Method = 'WNN-Leiden',
                                Clustering = I(list(s_obj[['leiden']]$leiden)),
                                Matrices = 'U,S',
                                Hyperparam = res[i],
                                HVGs = num_features ,
                                Data = shorts[o],
                                ARI = ARI(celllabs,s_obj[['leiden']]$leiden),
                                AMI = AMI(celllabs,s_obj[['leiden']]$leiden))
        }
    }

}

xy <- do.call(rbind, xy)

xy %>% 
  rowwise() %>% 
  mutate_if(is.list, ~paste(unlist(.), collapse = '|')) %>% 
  write.csv('./fits/Seuratruns_0508/WNN_sim_results.csv', row.names = FALSE)
  #write.csv('./fits/Seuratruns_0306/WNN_sim_results.csv', row.names = FALSE)

head(xy)
#shorts[o]

“Data is of class matrix. Coercing to dgCMatrix.”
“Data is of class matrix. Coercing to dgCMatrix.”
Normalizing layer: counts

Centering and scaling data matrix

PC_ 1 
Positive:  Feature523, Feature488, Feature73, Feature1090, Feature6, Feature1075, Feature373, Feature987, Feature1053, Feature376 
	   Feature187, Feature557, Feature930, Feature1015, Feature514, Feature863, Feature399, Feature1007, Feature50, Feature207 
	   Feature483, Feature823, Feature305, Feature267, Feature674, Feature289, Feature610, Feature511, Feature502, Feature255 
Negative:  Feature1024, Feature326, Feature499, Feature538, Feature120, Feature653, Feature847, Feature714, Feature119, Feature370 
	   Feature195, Feature729, Feature703, Feature3, Feature52, Feature965, Feature678, Feature336, Feature84, Feature783 
	   Feature1048, Feature713, Feature513, Feature81, Feature923, Feature34, Feature615, Feature113, Feature311, Feature582 
PC_ 2 
Positive:  Feature55, Feature188, Feature408, Feature871, Feature534,

Unnamed: 0_level_0,Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
Unnamed: 0_level_1,<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
1,WNN-Leiden,"31, 25, ....","U,S",0.75,1100,negctrl_sim_k1,0,0
2,WNN-Leiden,"48, 79, ....","U,S",1.0,1100,negctrl_sim_k1,0,0
3,WNN-Leiden,"81, 161,....","U,S",1.5,1100,negctrl_sim_k1,0,0
4,WNN-Leiden,"193, 130....","U,S",2.0,1100,negctrl_sim_k1,0,0
5,WNN-Leiden,"31, 25, ....","U,S",0.75,1100,negctrl_sim_k1,0,0
6,WNN-Leiden,"48, 79, ....","U,S",1.0,1100,negctrl_sim_k1,0,0


In [99]:
xy

Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
<chr>,<I<list>>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
WNN-Leiden,"31, 25, ....","U,S",0.75,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"48, 79, ....","U,S",1.0,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"81, 161,....","U,S",1.5,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"193, 130....","U,S",2.0,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"31, 25, ....","U,S",0.75,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"48, 79, ....","U,S",1.0,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"81, 161,....","U,S",1.5,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"193, 130....","U,S",2.0,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"31, 25, ....","U,S",0.75,1100,negctrl_sim_k1,0.0,0.0
WNN-Leiden,"48, 79, ....","U,S",1.0,1100,negctrl_sim_k1,0.0,0.0
