In [13]:
source("/root/workspace/code/midas/preprocess/utils.R")
setwd("/root/workspace/code/midas/")
library(RColorBrewer)

parser <- ArgumentParser()
parser$add_argument("--task", type = "character", default = "bm")
parser$add_argument("--method", type = "character", default = "scmomat")
parser$add_argument("--experiment", type = "character", default = "e0")
parser$add_argument("--model", type = "character", default = "default")
parser$add_argument("--init_model", type = "character", default = "sp_00001899")
o <- parser$parse_known_args()[[1]]

config <- parseTOML("configs/data.toml")[[gsub("_transfer$|_ref_.*$", "", o$task)]]
subset_names <- basename(config$raw_data_dirs)
subset_ids <- sapply(seq_along(subset_names) - 1, toString)
input_dirs <- pj("result", o$task, o$experiment, o$model, "predict", o$init_model, paste0("subset_", subset_ids))
pp_dir <- pj("data", "processed", o$task)
output_dir <- pj("result", "comparison", o$task, o$method)
mkdir(output_dir, remove_old = F)
label_paths <- pj(config$raw_data_dirs, "label_seurat", "l1.csv")

K <- parseTOML("configs/model.toml")[["default"]]$dim_c
l <- 7.5  # figure size
L <- 10   # figure size
m <- 0.5  # legend margin

## Load preprossed data

In [14]:
z_list <- list()
cell_name_list <- list()
label_list <- list()
is_label <- T
subset_name_list <- list()
S <- length(subset_names)
for (i in seq_along(subset_names)) {
    subset_name <- subset_names[i]
    z_dir    <- pj(input_dirs[i], "z", "joint")
    fnames <- dir(path = z_dir, pattern = ".csv$")
    fnames <- str_sort(fnames, decreasing = F)

    z_subset_list <- list()
    N <- length(fnames)
    for (n in seq_along(fnames)) {
        message(paste0("Loading Subset ", i, "/", S, ", File ", n, "/", N))
        z_subset_list[[n]] <- read.csv(pj(z_dir, fnames[n]), header = F)
    }
    z_list[[subset_name]] <- bind_rows(z_subset_list)

    cell_name_list[[subset_name]] <- read.csv(pj(pp_dir, paste0("subset_", subset_ids[i]),
        "cell_names.csv"), header = T)[, 2]
    if (file.exists(label_paths[i])) {
        label_list[[subset_name]] <- read.csv(label_paths[i], header = T)[, 2]
    } else {
        is_label <- F
    }
    
    subset_name_list[[subset_name]] <- rep(subset_name, length(cell_name_list[[subset_name]]))
}

Loading Subset 1/3, File 1/136

Loading Subset 1/3, File 2/136

Loading Subset 1/3, File 3/136

Loading Subset 1/3, File 4/136

Loading Subset 1/3, File 5/136

Loading Subset 1/3, File 6/136

Loading Subset 1/3, File 7/136

Loading Subset 1/3, File 8/136

Loading Subset 1/3, File 9/136

Loading Subset 1/3, File 10/136

Loading Subset 1/3, File 11/136

Loading Subset 1/3, File 12/136

Loading Subset 1/3, File 13/136

Loading Subset 1/3, File 14/136

Loading Subset 1/3, File 15/136

Loading Subset 1/3, File 16/136

Loading Subset 1/3, File 17/136

Loading Subset 1/3, File 18/136

Loading Subset 1/3, File 19/136

Loading Subset 1/3, File 20/136

Loading Subset 1/3, File 21/136

Loading Subset 1/3, File 22/136

Loading Subset 1/3, File 23/136

Loading Subset 1/3, File 24/136

Loading Subset 1/3, File 25/136

Loading Subset 1/3, File 26/136

Loading Subset 1/3, File 27/136

Loading Subset 1/3, File 28/136

Loading Subset 1/3, File 29/136

Loading Subset 1/3, File 30/136

Loading Subset 1/3,

## Create seurat object

In [15]:
rna <- t(data.matrix(bind_rows(z_list))) * 0  # pseudo rna counts
colnames(rna) <- do.call("c", unname(cell_name_list))
rownames(rna) <- paste0("rna-", seq_len(nrow(rna)))
obj <- CreateSeuratObject(counts = rna, assay = "rna")

if (o$method %in% c("multigrate", "stabmap", "uniport", "glue", "scipenn", "totalvi")) {
    z <- data.matrix(read.csv(pj(output_dir, "embeddings.csv"), header = T, row.names = 1))
} else {
    z <- data.matrix(read.csv(pj(output_dir, "embeddings.csv"), header = F))
}

colnames(z) <- paste0("z_", seq_len(ncol(z)))
rownames(z) <- colnames(obj)
obj[["z"]] <- CreateDimReducObject(embeddings = z, key = "z_", assay = "rna")

obj@meta.data$l1 <- do.call("c", unname(label_list))
obj@meta.data$batch <- factor(x = do.call("c", unname(subset_name_list)), levels = subset_names)
table(obj@meta.data$batch)[unique(obj@meta.data$batch)]

obj



  ica    BM    bm 
34592 10671 26286 

An object of class Seurat 
34 features across 71549 samples within 1 assay 
Active assay: rna (34 features, 0 variable features)
 1 dimensional reduction calculated: z

## Visualization

In [16]:
obj <- RunUMAP(obj, reduction = "z", dims = 1:K, reduction.name = "umap")
SaveH5Seurat(obj, pj(output_dir, "obj.h5seurat"), overwrite = TRUE)

13:45:36 UMAP embedding parameters a = 0.9922 b = 1.112

13:45:36 Read 71549 rows and found 32 numeric columns

13:45:36 Using Annoy for neighbor search, n_neighbors = 30

13:45:36 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

13:45:48 Writing NN index file to temp file /tmp/Rtmp7EpgIJ/file2457538fed0cd

13:45:48 Searching Annoy index using 64 threads, search_k = 3000

13:45:50 Annoy recall = 100%

13:45:51 Commencing smooth kNN distance calibration using 64 threads
 with target n_neighbors = 30

13:45:54 Initializing from normalized Laplacian + noise (using irlba)

13:45:59 Commencing optimization for 200 epochs, with 3111698 positive edges

13:46:37 Optimization finished

Creating h5Seurat file for version 3.1.5.9900

Adding counts for rna

Adding data for rna

No 

In [None]:
# obj <- LoadH5Seurat(pj(output_dir, "obj.h5seurat"), assays = "adt", reductions = "umap")

dim_plot(obj, w = L, h = L, reduction = 'umap', no_axes = T,
    split.by = NULL, group.by = "batch", label = F, repel = T, label.size = 4, pt.size = 0.1, cols = col_4, legend = F,
    save_path = pj(output_dir, paste(o$method, "merged_batch", sep = "_")))

dim_plot(obj, w = L, h = L, reduction = 'umap', no_axes = T,
    split.by = NULL, group.by = "l1", label = F, repel = T, label.size = 4, pt.size = 0.1, cols = col_8, legend = F,
    save_path = pj(output_dir, paste(o$method, "merged_label", sep = "_")))
