In [1]:
library(monocle3)
library(Seurat)



Loading required package: Biobase

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: 'BiocGenerics'


The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs


The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which, which.max, which.min


Welcome to Bioconductor

    Vignettes contain introductory material; vie

In [2]:
extract.neurons <- function(){
    library(DropSeq.util)
    library(Seurat)

    regions = list(
            "Cerebellum_ALT" = c("CB","cerebellum"),
            "EntoPeduncular" = c("ENT","entopeduncular"),
            "Cortex_noRep5_FRONTALonly" = c("FC", "frontal_cortex"),
            "GlobusPallidus" = c("GP","GlobusPallidus"),
            "Hippocampus" = c("HC","Hippocampus"),
            "Cortex_noRep5_POSTERIORonly" = c("PC", "posterior_cortex"),
            "Striatum" = c("STR", "Striatum"),
            "SubstantiaNigra" = c("SN","SubstantiaNigra"), 
            "Thalamus" = c("TH","Thalamus")
    )

    saunders.meta = read.csv('saunders-macosko/annotation.BrainCellAtlas_Saunders_version_2018.04.01.csv')

    sro = NULL

    for (file.prefix in names(regions)){
        print(file.prefix)
        region.id = regions[[file.prefix]][1]
        region.name = regions[[file.prefix]][2]   
        saunders.data <- loadSparseDge(paste0("saunders-macosko/F_GRCm38.81.P60",file.prefix,".raw.dge.txt.gz"))

        clusters = readRDS(paste0("saunders-macosko/F_GRCm38.81.P60",file.prefix,".cell_cluster_outcomes.RDS"))
        subclusters = readRDS(paste0("saunders-macosko/F_GRCm38.81.P60", file.prefix,".subcluster.assign.RDS"))
        neuronal.clusters = subset(saunders.meta, (class == 'NEURON') & (tissue == region.id))
        paste("Region", region.name, "dim", dim(saunders.data))
        sc = unique(neuronal.clusters$subcluster)
        mask = !is.na(clusters$subcluster) & (clusters$subcluster %in% sc)
        saunders.data = saunders.data[,mask]
        cell.meta = clusters[mask,]
        sro.tmp = CreateSeuratObject(saunders.data, project = paste0('saunders_',region.id))
        sro.tmp@meta.data$region = region.id
        sro.tmp@meta.data$cluster = as.factor(cell.meta$cluster)
        sro.tmp@meta.data$subcluster = as.factor(cell.meta$subcluster) 
        if(is.null(sro)){
            sro = sro.tmp
        }else{
            #if(length(sro) == length(sro.tmp) | any(rownames(sro) == rownames(sro.tmp))){
            #    stop(paste0('Oh No', file.prefix,' gene vector mismatch'))
            #}
            sro = merge(sro, sro.tmp)
            message(paste('sro has',length(rownames(sro)),'features'))
            
        }
    }

    saveRDS(sro, 'saunders-neurons.rds')
    sro
}

sro <- extract.neurons()

Loading required package: data.table

"package 'data.table' was built under R version 3.6.2"

Attaching package: 'data.table'


The following object is masked from 'package:SummarizedExperiment':

    shift


The following object is masked from 'package:GenomicRanges':

    shift


The following object is masked from 'package:IRanges':

    shift


The following objects are masked from 'package:S4Vectors':

    first, second


Loading required package: Matrix

"package 'Matrix' was built under R version 3.6.2"

Attaching package: 'Matrix'


The following object is masked from 'package:S4Vectors':

    expand




[1] "Cerebellum_ALT"
[1] "EntoPeduncular"


sro has 25651 features



[1] "Cortex_noRep5_FRONTALonly"


sro has 30079 features



[1] "GlobusPallidus"


sro has 30710 features



[1] "Hippocampus"


sro has 31292 features



[1] "Cortex_noRep5_POSTERIORonly"


sro has 31697 features



[1] "Striatum"


sro has 31832 features



[1] "SubstantiaNigra"


sro has 32057 features



[1] "Thalamus"


sro has 32307 features



In [4]:
convertHumanGeneList <- function(x){
    require("biomaRt")
    human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
    mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")

    genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)

    humanx <- genesV2
    return(humanx)
}

convertMatrixToHomology <- function(mat){
    
    gene.set = rownames(mat)
    homologues = convertHumanGeneList(gene.set)
    # Exclude gene symbols not available in dataset
    homologues = homologues[homologues$MGI.symbol %in% rownames(mat),]
    
    # Filter non unique genes! Maybe aggregate?
    t.homo = data.frame(table(homologues$HGNC.symbol))
    homo.unique = t.homo[t.homo$Freq == 1,]$Var1
    homologues = homologues[homologues$HGNC.symbol %in% homo.unique,]
    
    # Remove from data genes that are not in the homologue list
    mat.filtered = mat[rownames(mat) %in% homologues$MGI.symbol,]
    # Sort genes according to dataframe
    mat.homology = mat.filtered[homologues$MGI.symbol,]
    
    # replace with human genes
    rownames(mat.homology) = homologues$HGNC.symbol
    mat.homology
}

saunders.human = convertMatrixToHomology(sro@assays$RNA@counts)

sro.human = CreateSeuratObject(saunders.human, project = 'saunders-neurons')
sro.human@meta.data = sro@meta.data
saveRDS(sro.human, 'saunders-with-human-genes.rds')

Loading required package: biomaRt



In [5]:
cds = new_cell_data_set(sro.human@assays$RNA@counts, cell_metadata = sro.human@meta.data)
rowData(cds)$gene_short_name = rownames(cds)
saveRDS(cds, './objects/saunders-homology-object-raw.rds')

