In [1]:
suppressPackageStartupMessages({
    library(openxlsx)
    library(clusterProfiler)
})

Obtain cell type markers from text files.

In [2]:
txt.files <- list.files(pattern = ".txt", path = "RA2", full.names = T)
meta_data <- strsplit(txt.files, split = "_")

celltypes <- c("b-cells", "cd4-tcells", "cd8-tcells", "dendritic-cells", "fibro1-cd55", "fibro2a-thy1",
              "fibro2b-thy1", "macrophage", "mast-cells", "nk-cells", "plasma-cells", "platelets", "tph-cells")

GeneSets <- setNames(lapply(txt.files, function(cell.type.file) {
    read.table(cell.type.file, header = T, stringsAsFactors = F)$x
}), nm = celltypes)
    
GeneSets

# Run GO on celltype markers

We use the enricher() function from the [clusterProfiler](https://bioconductor.org/packages/release/bioc/html/clusterProfiler.html) R package to perform GO enrichment analysis. The gene annotations were obtained from the [MSigDb](http://software.broadinstitute.org/gsea/msigdb/collections.jsp) (GO Bological Processes dataset). Only terms with an adjusted p-value of >= 0.05 were retuerned.

In [3]:
h <- suppressPackageStartupMessages({read.gmt("../data/Patient1/c5.bp.v6.2.symbols.gmt")})

bp.eriched.terms <- setNames(lapply(celltypes, function(cl) {
    egmt <- enricher(GeneSets[[cl]], TERM2GENE = h, pvalueCutoff = 0.05)
    as.data.frame(egmt)
}), nm = celltypes)

Export results to excel file

In [4]:
library(openxlsx)

sh <- createWorkbook(title = "RA celltypes GO enrcihment")

for (cl in celltypes) {
    addWorksheet(wb = sh, sheetName = cl)
    firstrow <- paste0("RA2 cluster: ", gsub(pattern = "Cluster", replacement = "", x = cl))
    writeData(wb = sh, x = firstrow, sheet = cl)
    secondrow <- paste0("marker genes: ", paste(GeneSets[[cl]], collapse = ", "))
    writeData(wb = sh, x = secondrow, sheet = cl, startRow = 2)
    writeData(wb = sh, x = "", sheet = cl, startRow = 3)
    writeData(wb = sh, x = bp.eriched.terms[[cl]][, -1], sheet = cl, startRow = 4)
    setColWidths(wb = sh, sheet = cl, cols = 1:ncol(bp.eriched.terms[[cl]]), widths = "auto")
}

saveWorkbook(sh, file = "Cell_type_genes_for_GO/RA2_cells_clusters_GO_enrichment.xlsx", overwrite = TRUE)

# Date

In [5]:
date()

# Session Info

In [6]:
sessionInfo()

R version 3.5.1 (2018-07-02)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS  10.14.5

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] GSEABase_1.42.0       graph_1.58.0          annotate_1.58.0      
 [4] XML_3.98-1.13         AnnotationDbi_1.44.0  IRanges_2.14.10      
 [7] S4Vectors_0.18.3      Biobase_2.40.0        BiocGenerics_0.26.0  
[10] clusterProfiler_3.8.1 openxlsx_4.1.0       

loaded via a namespace (and not attached):
 [1] viridis_0.5.1        tidyr_0.8.2          viridisLite_0.3.0   
 [4] bit64_0.9-7          jsonlite_1.6         splines_3.5.1       
 [7] ggraph_1.0.2         assertthat_0