In [None]:
library(Seurat)
library(data.table)
library(ggplot2)
library(corrplot)
library(plotly)
library(heatmaply)
library(dplyr)
library(grid)
library(gridExtra)

In [None]:
redcolor<-c("#FFF5F0","#FEE0D2","#FCBBA1","#FC9272", "#FB6A4A", "#EF3B2C","#CB181D","#99000D" )
redcolor<-colorRampPalette(redcolor)

In [None]:
setwd(".../SVZ/Test_Code")

In [None]:
datasets <- readRDS(".../RData/SEZ_seurat_object.rds")
DefaultAssay(datasets) <- "RNA"

In [None]:
require('ape')
datasets <- BuildClusterTree(object = datasets, assay = "RNA")


In [None]:
avg_list <- AverageExpression(datasets)

avg_mat <- as.matrix(avg_list$integrated) # RNA contains all genes, integrated only the hihgly variable genes (celltype related)

require(corrplot)

M<-cor(avg_mat, method = "pearson")
head(round(M,2))

In [None]:
heatmaply_cor(M, limits = c(0,1), 
              colors=redcolor,show.dendrogram = c(FALSE, TRUE),row_dend_left=TRUE,hclust_method="single",
                   showticklabels= c(TRUE, TRUE),
              file  = "heatmaply_clusters.html")

In [None]:
#object is already organized based on hier clustering
levels(datasets)

In [None]:
#find clusters marker genes

In [None]:
datasets_2<-datasets

datasets_2<-NormalizeData(datasets_2, normalization.method = "LogNormalize", scale.factor = 10000)
all.genes <- rownames(datasets_2)
datasets_2 <- ScaleData(datasets_2, features = all.genes)
Idents(datasets_2) <- datasets_2$cluster

In [None]:
all.markers <- FindAllMarkers(object = datasets, only.pos = TRUE, return.thresh = 0.01)
write.csv(all.markers, "marker_genes_all_clusters_d30k10r04.csv")

In [None]:
#GO analysis

In [None]:
#remove MT and then select FC05

markers<-read.csv("marker_genes_all_clusters_d30k10r04.csv")
markers$cluster <-factor(markers$cluster, levels = paste(c(0:19))) 



markers<-with(markers, markers[order(cluster),])
markers <- as.data.table(markers)
markers<-markers[!grep("^MT-", X)]
markers<-subset(markers, subset = avg_log2FC >=0.5)
markers<-subset(markers,subset= p_val_adj <= 0.05)


markers<-as.data.frame(markers)


topFC05 <- markers
#topFC05<-subset(topFC05, subset = cluster %in% astrocytes)
topFC05<-as.data.frame(topFC05)

topFC05_list <- split( topFC05 , f = topFC05$cluster )

levels(topFC05$cluster)
subcelltype01s <- levels(topFC05$cluster)
for (subcelltype01 in subcelltype01s){
    x=topFC05_list[[paste0(subcelltype01)]]["gene"]
    write.csv(x, paste0("gProfiler2/marker_topFC05_",subcelltype01,"_.csv"))

}
#run g profiler on each topFC05 file and make a separate file with top go terms
for (subcelltype01 in subcelltype01s){
    marker_genes=read.csv(paste0("gProfiler2/marker_topFC05_",subcelltype01,"_.csv") )
    marker_genes=marker_genes$gene
    gostres <- gost(query =marker_genes , 
                organism = "hsapiens", ordered_query = FALSE, 
                multi_query = FALSE, significant = TRUE, exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", sources = NULL, as_short_link = FALSE)
    gostres$result=subset(gostres$result, source %in% c("GO:BP"))#"GO:CC", "REAC","GO:MF",
    result=gostres$result
    result=result[, c("term_name","p_value", "source","term_id","intersection_size","query_size","term_size")]
    result$neg_log_p=-log10(result$p_value)
    result= result %>%arrange(dplyr::desc(neg_log_p))
    
    write.csv(result,paste0("gProfiler2/go_BP_FC05top_",subcelltype01,".csv"))
                
    

    }

my_levels=levels(markers$cluster)

df=list()
p_val=c()
for (subcelltype01 in my_levels){
    go_terms<-read.csv(paste0("gProfiler2/go_BP_FC05top_",subcelltype01,".csv"))
    #go_terms=go_terms[1:10,]
    p=go_terms$neg_log_p
    p_val=c(p_val,p)
    go_terms$subcelltype01<-subcelltype01#rep(paste(subcelltype01), 10)
    df[[subcelltype01]]=go_terms
    #write.csv(df,paste0("gProfiler/go_all_cells_",astro,".csv"))
    }
                
df<-do.call(rbind,df)
df
df$subcelltype01<-factor(df$subcelltype01)

In [None]:
library(rrvgo) #adredss redundancy of terms

In [None]:
#eliminate reduntand terms
subcelltype01s<-levels(df$subcelltype01)

df_l<-split(df, f=df$subcelltype01)
for (subcelltype01 in subcelltype01s){
    go<-df_l[[subcelltype01]]
    simMatrix <- calculateSimMatrix(go$term_id,
                                orgdb="org.Hs.eg.db",
                                ont=c("BP"),
                                method="Rel")
    name_scores<-go$term_id
    scores<-go$neg_log_p
    names(scores)<-name_scores
    
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,#0.7
                                orgdb="org.Hs.eg.db")
    
    

    write.csv(reducedTerms,paste0("gProfiler2/go_BP_reduced_FC05top_",subcelltype01,".csv"))
    
    }
go_red=list()
for (subcelltype01 in subcelltype01s){
    go_terms_red<-read.csv(paste0("gProfiler2/go_BP_reduced_FC05top_",subcelltype01,".csv"))
    go_terms_red$subcelltype01<-subcelltype01
    go_red[[subcelltype01]]=go_terms_red
    
    #write.csv(df,paste0("gProfiler/go_all_cells_",astro,".csv"))
    }
                
go_red<-do.call(rbind,go_red)

go_red_l<-split(go_red, f=go_red$subcelltype01)
#subcelltype01s <- levels(topFC05$cluster)

for (subcelltype01 in subcelltype01s){
    go_red_l[[subcelltype01]]<-go_red_l[[subcelltype01]]%>% distinct(parentTerm, .keep_all= TRUE)

    
} 
go_red_l
new_list <- lapply(go_red_l, function(x) subset(x, select=c("parentTerm","score","subcelltype01" )))

In [None]:
new_list_2 <- do.call("rbind", new_list)
new_list_2<-left_join(new_list_2, df, by = c("parentTerm" = "term_name", "subcelltype01" = "subcelltype01"))    
write.csv(new_list_2, "gProfiler2/go_BP_reduced_FC05top_all_cells.csv")
new_list_2<-read.csv("gProfiler2/go_BP_reduced_FC05top_all_cells.csv")
new_list_2

In [None]:
a=c("5","1","17","6","18","15","9","10","12","3","2","14","13","11","19","4","8","7","0","16")#hier clustering order

new_list_2$subcelltype01  <-factor(new_list_2$subcelltype01, levels=a)
 


new_list_3<-with(new_list_2, new_list_2[order(subcelltype01,-score,parentTerm ),])
parentTerms<-unique(new_list_3$parentTerm)
new_list_3$parentTerm <- factor(new_list_3$parentTerm, levels = parentTerms)
new_list_4<-new_list_3 %>% group_by(subcelltype01) %>% top_n(n=1,wt = score)
length(unique(new_list_4$parentTerm))
parentTerms<-unique(new_list_4$parentTerm)
new_list_4$parentTerm <- factor(new_list_4$parentTerm, levels = parentTerms)
new_list_4<-within(new_list_4, size <- paste(intersection_size,"/",term_size)) 

new_list_4<-within(new_list_4, term_plus_size <- paste(parentTerm," ",size)) 


head(new_list_4)

In [None]:
write.csv(new_list_4, "plot_gprofiler.csv")