# Tome transitions for Seruat V3.1.4

The following code is a copy of code from https://github.com/ChengxiangQiu/tome_code, the createLineage_Knn-function is in https://github.com/ChengxiangQiu/tome_code/blob/main/help_code/help_code.R, the main function is in https://github.com/ChengxiangQiu/tome_code/blob/main/Section2_trajectory_Step1_connection.R

This runs TOME, with the only difference being it excludes extraembryonic tissues since the experiments sometimes extract more and sometime less extraembryonic tissue

In [1]:
library(Seurat)
library(future)
library(future.apply)
library(FNN)

Attaching SeuratObject

Attaching sp



In [2]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] FNN_1.1.3.1        future.apply_1.9.0 future_1.26.1      sp_1.5-0          
[5] SeuratObject_4.1.0 Seurat_4.1.1      

loaded via a namespace (and not attached):
  [1] Rtsne_0.16            colorspace_2.0-3      deldir_1.0-6         
  [4] ellipsis_0.3.2        

In [3]:
file_path='/home/mgander/moscot-framework_reproducibility/data/mouse_temporal/RDS'

work_path = "/home/mgander/moscot-framework_reproducibility/data/mouse_temporal/Seurat_Representations"

work_path2 = "/home/mgander/moscot-framework_reproducibility/data/mouse_temporal/TOME_maps_growth_rate_and_driver_genes_analysis"

time_point = paste0("E", c(3.5, 4.5, 5.25, 5.5, 6.25, seq(6.5, 8.25, 0.25), "8.5a", "8.5b", seq(9.5, 13.5, 1)))

In [4]:
#####################################################
### Function: finding ancestor node for each node ###
#####################################################

createLineage_Knn <- function(emb, pd, time_1, reduction="umap", replication_times=500, removing_cells_ratio=0.2, k_neigh = 5){
    
    print(dim(emb))
    if(!"Anno" %in% names(pd) | !"day" %in% names(pd)) {print("Error: no Anno or day in pd")}
    if(sum(rownames(pd)!=rownames(emb))!=0) {print("Error: rownames are not matched")}
    pd$state = pd$Anno
    
    res = list()
    
    rep_i = 1
    
    while(rep_i < (replication_times+1)){
        
        sampling_index = sample(1:nrow(pd),round(nrow(pd)*(1-removing_cells_ratio)))
        
        emb_sub = emb[sampling_index,]
        pd_sub = pd[sampling_index,]
        
        irlba_pca_res_1 <- emb_sub[as.vector(pd_sub$day)=="pre",]
        irlba_pca_res_2 <- emb_sub[as.vector(pd_sub$day)=="nex",]
        pd_sub1 <- pd_sub[pd_sub$day == "pre",]
        pd_sub2 <- pd_sub[pd_sub$day == "nex",]
        
        pre_state_min = min(table(as.vector(pd_sub1$state)))
        
        if (pre_state_min < k_neigh & pre_state_min >= 3){
            k_neigh = pre_state_min
            print(k_neigh)
        }
        
        if (pre_state_min < 3){
            next
        }
        
        neighbors <- get.knnx(irlba_pca_res_1, irlba_pca_res_2, k = k_neigh)$nn.index
        
        tmp1 <- matrix(NA,nrow(neighbors),ncol(neighbors))
        for(i in 1:k_neigh){
            tmp1[,i] <- as.vector(pd_sub1$state)[neighbors[,i]]
        }
        state1 <- names(table(as.vector(pd_sub1$state)))
        state2 <- names(table(as.vector(pd_sub2$state)))
        
        tmp2 <- matrix(NA,length(state2),length(state1))
        for(i in 1:length(state2)){
            x <- c(tmp1[as.vector(pd_sub2$state)==state2[i],])
            for(j in 1:length(state1)){
                tmp2[i,j] <- sum(x==state1[j])
            }
        }
        tmp2 <- tmp2/apply(tmp2,1,sum)
        tmp2 <- data.frame(tmp2)
        row.names(tmp2) = state2
        names(tmp2) = state1
        
        
        
        # I need the nearest neighbors later, so I'll save them:
        write.csv(rownames(pd_sub1), paste0(work_path2, "/", time_1, '/early_cells_', rep_i, ".csv"), row.names=FALSE, quote=FALSE) 
        write.csv(rownames(pd_sub2), paste0(work_path2, "/", time_1, '/late_cells_', rep_i, ".csv"), row.names=FALSE, quote=FALSE) 
        write.csv(neighbors, paste0(work_path2, "/", time_1, '/neigbors_', rep_i, ".csv"), row.names=FALSE, quote=FALSE) 
        
        
        res[[rep_i]] = tmp2
        
        rep_i = rep_i + 1
        
    }
    
    return(res)
}

# KNN-Scoring to obtain transition rates

In [5]:
remove_exe_cells <- function(anno, i){
    if (i<6){return(anno)}
    if (i>5)
        {ExE_cell_types=c('Embryonic visceral endoderm', 'Extraembryonic visceral endoderm', 'Parietal endoderm', 'Extraembryonic ectoderm', 'Primitive erythroid cells', 'Blood progenitors')}
    if (i>14)
        {ExE_cell_types=c('Extraembryonic visceral endoderm')}
    anno_new <- anno[!(sapply(strsplit(anno$Anno, ":"), "[", 2) %in% ExE_cell_types), ]
    
    return(anno_new)
}

In [104]:
# You will need about 20 Gb of free space to save all neirest_neigbors, and cell annotations

for (i in 1:19) {
    
    # Prepare object
    time_1=time_point[i]
    print(time_1)
    time_2=time_point[i+1]

    if (time_2=='E7')
        {time_2='E7.0'}
    if (time_1=='E7')
        {time_1='E7.0'}
    
    anno1 = readRDS(paste0(file_path, "/seurat_object_", time_1, ".rds"))
    anno1$Anno = as.vector(anno1$cell_state)
    anno1=cbind(anno1[["day"]], anno1[["Anno"]])
    anno1$day = "pre"
    anno1$stage = time_1

    anno2 = readRDS(paste0(file_path, "/seurat_object_", time_2, ".rds"))
    anno2$Anno = as.vector(anno2$cell_state)
    anno2=cbind(anno2[["day"]], anno2[["Anno"]])
    anno2$day = "nex"
    anno2$stage = time_2
    
    if (time_2=='E8')
        {time_2='E8.0'}
    if (time_1=='E8')
        {time_1='E8.0'}
    
    emb = readRDS(paste0(work_path, "/", time_1, "_", time_2, "_umap3", ".rds"))
    emb = data.frame(emb)
    
    anno = rbind(anno1, anno2)
    anno = remove_exe_cells(anno, i)
    
    emb = emb[rownames(anno),]
    
    ########           I added three lines in this function to save TOMEs idenified k-nn
    res = createLineage_Knn(emb, anno, time_1,  k_neigh = 5) #### createLineage_Knn function was in help_code.R
}

[1] "E3.5"
[1] 157   3
[1] "E4.5"
[1] 398   3
[1] "E5.25"
[1] 795   3
[1] "E5.5"
[1] 785   3
[1] "E6.25"
[1] 4765    3
[1] 3
[1] "E6.5"
[1] 4532    3
[1] "E6.75"
[1] 12295     3
[1] "E7"
[1] 21145     3
[1] "E7.25"
[1] 16757     3
[1] "E7.5"
[1] 14870     3
[1] "E7.75"
[1] 20003     3
[1] "E8"
[1] 25041     3
[1] "E8.25"
[1] 26706     3
[1] "E8.5a"
[1] 163685      3
[1] "E8.5b"
[1] 257896      3
[1] "E9.5"
[1] 380453      3
[1] "E10.5"
[1] 724637      3
[1] "E11.5"
[1] 747850      3
[1] "E12.5"
[1] 557850      3
