In [None]:
library(Seurat)
library(stringr)

In [None]:
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")

In [2]:
tisstypes = list.files(path = "../data/Robjs/") 
geneinfo = read.table("/publicdata/gencode_v19_20151104/gene_info.tsv", header = T)
output_path = "../tables/Cibersort_input/"

In [None]:
convertMouseGeneList <- function(x){

    genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T)
    humanx <- unique(genesV2[, 2])
    return(humanx)
}

In [None]:
for( tiss in tisstypes){
    
    tissname = str_split_fixed(basename(tiss), "_", 3)[,2]
    species  = str_split_fixed(basename(tiss), "_", 3)[,1]
    
    # Load data ---------------------------
    load(tiss, verbose = F)
    rObj = tiss
    cts = unique(Idents(object = rObj)) # get annotated cell types in the tissue
    
    # Calculate DE genes ---------------------------
    markers = list()

    for( celltype in cts){

        celltype_name = gsub(" " , "_", celltype)
        assign(paste(celltype_name, "markers", sep = "_"), FindMarkers(human_skin, ident.1 = celltype, ident.2 = NULL, min.pct = 0.25))        
        markers[[paste(celltype_name, "markers", sep = "_")]] = paste(celltype_name, "markers", sep = "_")

    }
    
    # Identify signature genes ---------------------------
    sig.df = data.frame(genes = NA, celltype = NA)


    for( i in seq(1, length(markers))){

        ct   = str_split_fixed(names(markers[i]), "_marke", 2)[,1]
        temp = as.data.frame(markers[[i]])
        temp = temp[order(temp$p_val_adj), ]
        temp = temp[temp$avg_logFC > 0.25 & temp$p_val_adj < 0.05, ]

        if(nrow(temp) > 200){ genes2add = rownames(temp[1:200,])}
        else{ genes2add = rownames(temp[1:nrow(temp),]) }

        sig.df.temp = data.frame(genes = genes2add, celltype = ct)
        sig.df = rbind(sig.df, sig.df.temp)

    }

    # Refine signature genes ---------------------------
    sig.df = sig.df[2:nrow(sig.df),]
    
    if (species == "mouse"){ sig.df$genes = convertMouseGeneList(sig.df$genes)}
    
    sig.df$geneid = geneinfo[match(sig.df$genes, geneinfo$gene_name), "gene_id"]
    sig.df$dup = duplicated(sig.df$genes)
    sig.df = sig.df[sig.df$dup == F, ]
    
    # Calculate mean expression of signature gene for each cell type ---------------------------
    sig.avg_exp = AverageExpression(rObj, features = unique(sig.df$genes))
    sig.avg_exp = sig.avg_exp$RNA
    sig.avg_exp$gene_id = geneinfo[match(rownames(sig.avg_exp), geneinfo$gene_name), "gene_id"]
    rownames(sig.avg_exp) <- NULL
    signature_gene_matrix = sig.avg_exp[, c(ncol(sig.avg_exp), 1:(ncol(sig.avg_exp) - 1))]
    
    # Write results ---------------------------
    write.csv(signature_gene_matrix, file = paste(output_path, tissname, sep = "")
    
    
}