# Tome transitions for Seruat V3.1.4

The following code is a copy of code from https://github.com/ChengxiangQiu/tome_code, the createLineage_Knn-function is in https://github.com/ChengxiangQiu/tome_code/blob/main/help_code/help_code.R, the main function is in https://github.com/ChengxiangQiu/tome_code/blob/main/Section2_trajectory_Step1_connection.R

In [1]:
library(Seurat)
library(future)
library(future.apply)
library(FNN)

In [2]:
sessionInfo()

R version 4.1.3 (2022-03-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Rocky Linux 8.7 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /home/icb/manuel.gander/miniconda3/envs/seurat4/lib/libopenblasp-r0.3.20.so

locale:
 [1] LC_CTYPE=C.UTF-8           LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] FNN_1.1.3.1        future.apply_1.9.0 future_1.26.1      Seurat_3.1.4      

loaded via a namespace (and not attached):
  [1] tsne_0.1-3.1        nlme_3.1-157        matrixStats_0.62.0 
  [4] RcppAnnoy_0.0.19    RColorBrewer_1.1-3  httr_1.4.3         
  [7] repr_1.1.4      

In [3]:
file_path='/lustre/groups/ml01/projects/2022_moscot/tome/RDS'

work_path = "/home/icb/manuel.gander/mouse_atlas/data/PCA_representations"

work_path2 = "/home/icb/manuel.gander/mouse_atlas/data/TOME_maps/PCA/CTs"

time_point = paste0("E", c(3.5, 4.5, 5.25, 5.5, 6.25, seq(6.5, 8.25, 0.25), "8.5a", "8.5b", seq(9.5, 13.5, 1)))

In [4]:
#####################################################
### Function: finding ancestor node for each node ###
#####################################################

createLineage_Knn <- function(emb, pd, time_1, reduction="umap", replication_times=500, removing_cells_ratio=0.2, k_neigh = 5){
    
    print(dim(emb))
    if(!"Anno" %in% names(pd) | !"day" %in% names(pd)) {print("Error: no Anno or day in pd")}
    if(sum(rownames(pd)!=rownames(emb))!=0) {print("Error: rownames are not matched")}
    pd$state = pd$Anno
    
    res = list()
    
    rep_i = 1
    
    while(rep_i < (replication_times+1)){
        
        sampling_index = sample(1:nrow(pd),round(nrow(pd)*(1-removing_cells_ratio)))
        
        emb_sub = emb[sampling_index,]
        pd_sub = pd[sampling_index,]
        
        irlba_pca_res_1 <- emb_sub[as.vector(pd_sub$day)=="pre",]
        irlba_pca_res_2 <- emb_sub[as.vector(pd_sub$day)=="nex",]
        pd_sub1 <- pd_sub[pd_sub$day == "pre",]
        pd_sub2 <- pd_sub[pd_sub$day == "nex",]
        
        pre_state_min = min(table(as.vector(pd_sub1$state)))
        
        if (pre_state_min < k_neigh & pre_state_min >= 3){
            k_neigh = pre_state_min
            print(k_neigh)
        }
        
        if (pre_state_min < 3){
            next
        }
        
        neighbors <- get.knnx(irlba_pca_res_1, irlba_pca_res_2, k = k_neigh)$nn.index
        
        tmp1 <- matrix(NA,nrow(neighbors),ncol(neighbors))
        for(i in 1:k_neigh){
            tmp1[,i] <- as.vector(pd_sub1$state)[neighbors[,i]]
        }
        state1 <- names(table(as.vector(pd_sub1$state)))
        state2 <- names(table(as.vector(pd_sub2$state)))
        
        tmp2 <- matrix(NA,length(state2),length(state1))
        for(i in 1:length(state2)){
            x <- c(tmp1[as.vector(pd_sub2$state)==state2[i],])
            for(j in 1:length(state1)){
                tmp2[i,j] <- sum(x==state1[j])
            }
        }
        tmp2 <- tmp2/apply(tmp2,1,sum)
        tmp2 <- data.frame(tmp2)
        row.names(tmp2) = state2
        names(tmp2) = state1

        res[[rep_i]] = tmp2
        
        rep_i = rep_i + 1
        
    }
    
    return(res)
}

#### calcualting the median value of each entry in the matrix

calc_median <- function(res){
    replication_times=500
    dat = res
    state_1 = row.names(dat[[1]])
    state_2 = names(dat[[1]])
    tmp_1 = matrix(NA,nrow(dat[[1]]),ncol(dat[[1]]))
    for(i in 1:nrow(dat[[1]])){
        for(j in 1:ncol(dat[[1]])){
            xx = NULL
            for(k in 1:replication_times){
                xx = c(xx, dat[[k]][i,j])
            }
            tmp_1[i,j] = median(xx[!is.na(xx)])
        }
    }
    tmp_1 = data.frame(tmp_1)
    row.names(tmp_1) = state_1
    names(tmp_1) = state_2
    return(tmp_1)
}


#### calculating the variance of each entry in the matrix

calc_var <- function(res){
    replication_times=500
    dat = res
    state_1 = row.names(dat[[1]])
    state_2 = names(dat[[1]])
    tmp_1 = matrix(NA,nrow(dat[[1]]),ncol(dat[[1]]))
    for(i in 1:nrow(dat[[1]])){
        for(j in 1:ncol(dat[[1]])){
            xx = NULL
            for(k in 1:replication_times){
                xx = c(xx, dat[[k]][i,j])
            }
            tmp_1[i,j] = sqrt(var((xx[!is.na(xx)])))
        }
    }
    tmp_1 = data.frame(tmp_1)
    row.names(tmp_1) = state_1
    names(tmp_1) = state_2
    return(tmp_1)
}

# KNN-Scoring to obtain transition rates

In [5]:
# You will need about 20 Gb of free space to save all neirest_neigbors, and cell annotations

for (i in 1:13) {
    
    # Prepare object
    time_1=time_point[i]
    print(time_1)
    time_2=time_point[i+1]

    if (time_2=='E7')
        {time_2='E7.0'}
    if (time_1=='E7')
        {time_1='E7.0'}
    
    anno1 = readRDS(paste0(file_path, "/seurat_object_", time_1, ".rds"))
    anno1$Anno = as.vector(anno1$cell_state)
    anno1=cbind(anno1[["day"]], anno1[["Anno"]])
    anno1$day = "pre"
    anno1$stage = time_1

    anno2 = readRDS(paste0(file_path, "/seurat_object_", time_2, ".rds"))
    anno2$Anno = as.vector(anno2$cell_state)
    anno2=cbind(anno2[["day"]], anno2[["Anno"]])
    anno2$day = "nex"
    anno2$stage = time_2
    
    if (time_2=='E8')
        {time_2='E8.0'}
    if (time_1=='E8')
        {time_1='E8.0'}
    
    # For E8.5b to E9.5 use the integration where more features have been used in anchor-based batch integration
    k=''
    if (time_1=='E8.5b')
        {k='_new'}
    
    #emb = readRDS(paste0(work_path, "/", time_1, "_", time_2, "_umap3", k, ".rds"))
    #emb = data.frame(emb)
    
    # Use the PCA-embedding
    emb <- read.csv(paste0(work_path, "/", time_1, "_", time_2, "_umap3", k, ".csv"))
    rownames(emb)=emb[['X']]
    emb$X <- NULL
    
    anno = rbind(anno1, anno2)
    if(nrow(emb) != nrow(anno)){
        print("Error!")
        print(xxx)
    }
    anno = anno[rownames(emb),]
    
    # Calcualte TOME transitions
    res = createLineage_Knn(emb, anno, time_1,  k_neigh = 5) #### createLineage_Knn function was in help_code.R
    
    A0=calc_median(res)
    A1=calc_var(res)
    
    write.csv(A0, paste0(work_path2, "/TOME_map_", time_1, '_', time_2, ".csv"), row.names=TRUE, quote=FALSE) 
    write.csv(A1, paste0(work_path2, "/TOME_map_sd_", time_1, '_', time_2, ".csv"), row.names=TRUE, quote=FALSE) 
}

[1] "E3.5"
[1] 157   3
[1] "E4.5"
[1] 398   3
[1] "E5.25"
[1] 795   3
[1] "E5.5"
[1] 785   3
[1] "E6.25"
[1] 4765    3
[1] 3
[1] "E6.5"
[1] 6519    3
[1] "E6.75"
[1] 16824     3
[1] "E7"
[1] 28286     3
[1] "E7.25"
[1] 24531     3
[1] "E7.5"
[1] 25487     3
[1] "E7.75"
[1] 31174     3
[1] "E8"
[1] 32616     3
[1] "E8.25"
[1] 32844     3
