In [1]:
## Calculate organizer subtype markers by comparing each of them versus other dorsal/ventral NSCs & early subtypes
library(Seurat)
library(tibble)
library(dplyr)

Registered S3 methods overwritten by 'tibble':
  method     from  
  format.tbl pillar
  print.tbl  pillar


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
## Calculate the markers
resFile <- "./load_files/MarRes_PATvsNSC.rds"
if (!file.exists(resFile)){
    message("Loading the full macaque scRNA-seq data")
    load(file = "../../MF1/overview/load_files/Macaque.developing.seurat.Rdata")
    ## mac, subtype_order, subclass_order
    
    ## subset to only early stem cells and subtypes
    message("Subset to NPCs")
    npc <- subset(mac, subclass %in% c("Patterning centers", "dorsal NSC", "GE NSC", "Early subtypes"))
    npc_cls <- levels(as.factor(as.character(npc$subtype)))
    organizer_cls <- c("PC FGF17", "AntVen NKX2-1 LMO1", "AntVen NKX2-1 NKX6-2", 
                       "GE RG NKX2-1 DLK1", "GE RG NKX2-1 OLIG1", 
                       "PC RSPO3", "PC TTR", "PC SFRP2", "PC TCF7L2")
    bgcls <- setdiff(npc_cls, organizer_cls)

    message("Finding markers")
    allres <- lapply(organizer_cls, function(cls) {
        print(paste0("Calculating markers for subtype:", cls))
        Idents(npc) <- "subtype"
        res <- FindMarkers(npc, ident.1 = cls, ident.2 = bgcls, min.pct = 0.1, 
                           logfc.threshold = 0.1, base = exp(1), fc.name = "avg_logFC",
                           max.cells.per.ident = 1000, only.pos = TRUE) ## To be consistent with seurat v3
        res <- res %>%
                rownames_to_column("gene") %>%
                mutate(cluster = cls) %>%
                mutate(ratio_fc = (pct.1 + 0.01)/(pct.2 + 0.01))
        return(res)
        }) %>%
        do.call(rbind, .)

    saveRDS(allres, file = "./load_files/MarRes_PATvsNSC.rds")
}

In [3]:
## Load the calculated marker results
allres <- readRDS(file = "./load_files/MarRes_PATvsNSC.rds")
npc_use <- readRDS(file = "./load_files/MarRes_PATvsNSC_subtype_used.rds")


## Also load the organizer markers (comparisons within organizer cells), do the intersection between
## 1. each organizer subtype compared to all other organizer subtypes
## 2. each organizer subtype compared to all NSCs subtypes
org_exclu_mars <- readRDS(file = "../../MF2_organizer/overview/load_files/PAT_markers_withincompare.rds")
table(allres$cluster)
table(org_exclu_mars$cluster)


  AntVen NKX2-1 LMO1 AntVen NKX2-1 NKX6-2    GE RG NKX2-1 DLK1 
                 357                  476                  308 
  GE RG NKX2-1 OLIG1             PC FGF17             PC RSPO3 
                 242                  367                  376 
            PC SFRP2            PC TCF7L2               PC TTR 
                 514                  901                  977 


  AntVen NKX2-1 LMO1 AntVen NKX2-1 NKX6-2    GE RG NKX2-1 DLK1 
                 280                  379                  269 
  GE RG NKX2-1 OLIG1             PC FGF17             PC RSPO3 
                 215                  387                  499 
            PC SFRP2            PC TCF7L2               PC TTR 
                 285                  821                 1102 

In [4]:
## Intersect marker list to get the markers
organizer_cls <- c("PC FGF17", "AntVen NKX2-1 LMO1", "AntVen NKX2-1 NKX6-2", 
                       "GE RG NKX2-1 DLK1", "GE RG NKX2-1 OLIG1", 
                       "PC RSPO3", "PC TTR", "PC SFRP2", "PC TCF7L2")
mars_df <- lapply(organizer_cls, function(cls){
    mm_vs_npcs <- allres %>%
                    filter(cluster %in% cls)
    mm_vs_org <- org_exclu_mars %>%
                    filter(cluster %in% cls) %>%
                    .$gene
    mm_vs_org <- mm_vs_org[1:min(200, length(mm_vs_org))]
    sh_genes <- intersect(mm_vs_npcs$gene, mm_vs_org)
    gene_ord <- mm_vs_npcs %>%
                    filter(gene %in% sh_genes) %>%
                    mutate(ratio_fc_round = round(ratio_fc, digits = 1)) %>%
                    arrange(desc(ratio_fc), desc(avg_logFC)) %>%
                    select(-ratio_fc_round)
    gene_ord <- gene_ord[, c("gene", "p_val", "p_val_adj", "avg_logFC", "pct.1", "pct.2", "ratio_fc", "cluster")]
    return(gene_ord)
}) %>%
    do.call(rbind, .)
head(mars_df)

Unnamed: 0_level_0,gene,p_val,p_val_adj,avg_logFC,pct.1,pct.2,ratio_fc,cluster
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,FGF17,1.782225e-265,6.169886e-261,1.2498785,0.803,0.011,38.71429,PC FGF17
2,FGF18,2.017336e-65,6.983815e-61,0.3746504,0.257,0.003,20.53846,PC FGF17
3,FGF8,1.9603819999999998e-44,6.786646e-40,0.3091914,0.181,0.001,17.36364,PC FGF17
4,DLK1,5.720191e-243,1.980273e-238,0.9951232,0.795,0.037,17.12766,PC FGF17
5,LOC114676270,1.333273e-77,4.615657e-73,0.4119057,0.342,0.012,16.0,PC FGF17
6,CLYBL,1.537749e-129,5.323533000000001e-125,0.582047,0.51,0.029,13.33333,PC FGF17


In [5]:
## Write out to a table (for the paper)
mars_out <- mars_df
colnames(mars_out) <- c("gene", "Raw P value", "Adjusted P value", 
                        "Fold changes of average expression in log scale",
                       "Expression ratio (this subtype)", "Expression ratio (background)", 
                        "Ration fold changes", "Subtype")


## Add pathway annotation
new.list <- readRDS(file = "./load_files/Pathway_updated_20221219.rds") ## signaling pathway annotations
## Add pathway annotation
for (pa in names(new.list)){
    mars_out[, paste0("Pathway_", pa)] <- ifelse(mars_out$gene %in% new.list[[pa]], 1, 0)
}


write.table(mars_out, file = "./report/table_PAT_markers_compared_to_NSCs.txt", 
            row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)


In [6]:
## Write to tables (for Carlo)
mars <- split(mars_df$gene, mars_df$cluster) %>% lapply(., unique)
out.list <- sapply(1:length(mars), function(x) paste(c(names(mars)[x], mars[[x]]), collapse = "\t"))
writeLines(out.list, con = "./load_files/PAT_markers_compared_to_NSCs.txt")

In [7]:
sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)

Matrix products: default
BLAS:   /gpfs/ycga/home/sm2726/packages/R_versions/R-3.6.1/lib64/R/lib/libRblas.so
LAPACK: /gpfs/ycga/home/sm2726/packages/R_versions/R-3.6.1/lib64/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_1.0.7  tibble_3.0.1 Seurat_3.1.5

loaded via a namespace (and not attached):
 [1] httr_1.4.1          tidyr_1.1.3         jsonlite_1.6.1     
 [4] viridisLite_0.3.0   splines_3.6.1 