In [None]:
library(Seurat)
library(dplyr)
library(tidyverse)
library(MetBrewer)
library(reshape)
library(data.table)

In [None]:
#Read in data (highres clusters)
seurat <- readRDS("seurat_aggr.rds")


In [None]:
#Read in data from reference atlas
seurat.regev <- readRDS('Regev_2020_human_mouse_adult_ENS/seurat.rds')
seurat.ginty <- readRDS('Ginty_2020_mouse_dev_adult_DRG/seurat.rds')
seurat.renthal <- readRDS('Renthal_2022_human_mouse_adult_TG/seurat.rds')
seurat.briscoe <- readRDS('Briscoe_2021_human_mouse_dev_spinal_cord/seurat.rds')
seurat.levine <- readRDS('Levine_2023_human_adult_spinal_cord/seurat.rds')
seurat.clark <- readRDS('Clark_2020_human_dev_retina/seurat.rds')
seurat.LNall <- readRDS('Linnarson/seurat.rds')
seurat.braun <- readRDS('Braun_2023_human_dev_brain/seurat.rds')


In [4]:
#Make a list for seurat object
seurat.list <- list('Linnarson_mouse_all' = seurat.LNall,
                   'Braun' = seurat.braun,
                   'Briscoe' = seurat.briscoe,
                   'Clark' = seurat.clark,
                   'Ginty' = seurat.ginty,
                   'Levine' = seurat.levine,
                   'Regev_mouse' = seurat.regev,
                   'Renthal_mouse' = seurat.renthal)


In [None]:
##Import label transfer results
#Define folder for anchor storage
master.dir <- 'Label_transfer/'

#Define dataset in use
data.dir <- 'iGlut_pre/'

ref.atlas <- c('Braun/', 'Linnarson_mouse_all/', 'Clark/', 'Briscoe/', 
               'Levine/', 'Ginty/', 'Regev_mouse/', 'Renthal_mouse/')

ref.name <- c('Braun', 'LNall', 'Clark', 'Briscoe', 
               'Levine', 'Ginty', 'Regev', 'Renthal')

conditions <- c('cca_l', 'cca_n', 'cca_mkrn')


In [6]:
#Generate anchor list per reference
anchor.braun <- list()
anchor.LNall <- list()
anchor.clark <- list()
anchor.briscoe <- list()
anchor.levine <- list()
anchor.ginty <- list()
anchor.regev <- list()
anchor.renthal <- list()

anchor.list <- list('Braun' = anchor.braun, 
                    'LNall' = anchor.LNall, 
                    'Clark' = anchor.clark, 
                    'Briscoe' = anchor.briscoe,
                    'Levine' = anchor.levine, 
                    'Ginty' = anchor.ginty, 
                    'Regev' = anchor.regev, 
                    'Renthal' = anchor.renthal)


In [7]:
#Read in all anchors
for(i in seq_along(ref.name)){
    for(x in seq_along(conditions)){
        element <- readRDS(paste0(master.dir, data.dir, ref.atlas[i], conditions[x],'.rds'))
        element_name <- paste0(ref.name[i],'_',conditions[x])
        anchor.list[[i]][[element_name]] <- element
    }
}


In [9]:
# Setting the number of features to be transferred:
ntransfer = 22

In [10]:
#Create a list for storing predictions
pd.list <- list()
for(i in 1:ntransfer){
    pd.list[[i]] <- list()
}

In [11]:
#Create a list of refdata to be transferred
ref.list <- list(seurat.braun$Subregion,
                 seurat.braun$Subclass,
                 seurat.braun$CommonestSubdivision,
                 seurat.braun$AutoAnnotation,
                 seurat.braun$AutoAnnotationDefs,
                 seurat.LNall$Subclass,
                 seurat.LNall$ClusterName,
                 seurat.LNall$Taxonomy_group,
                 seurat.LNall$Region,
                 seurat.LNall$Tissue,
                 seurat.LNall$atlas,
                 seurat.clark$umap2_CellType,
                 seurat.clark$age,
                 seurat.briscoe$Type_step1,
                 seurat.briscoe$Type_step2,
                 seurat.levine$top_level_annotation,
                 seurat.levine$subtype_annotation,
                 seurat.ginty$annotation,
                 seurat.regev$Annotation,
                 seurat.regev$Type,
                 seurat.regev$Region,
                 seurat.renthal$subtype)


In [12]:
#Create a list for anchor input
anchor.input <- list(anchor.list$Braun,
                     anchor.list$Braun,
                     anchor.list$Braun,
                     anchor.list$Braun,
                     anchor.list$Braun,
                     anchor.list$LNall,
                     anchor.list$LNall,
                     anchor.list$LNall,
                     anchor.list$LNall,
                     anchor.list$LNall,
                     anchor.list$LNall,
                     anchor.list$Clark,
                     anchor.list$Clark,
                     anchor.list$Briscoe,
                     anchor.list$Briscoe,
                     anchor.list$Levine,
                     anchor.list$Levine,
                     anchor.list$Ginty,
                     anchor.list$Regev,
                     anchor.list$Regev,
                     anchor.list$Regev,
                     anchor.list$Renthal)


In [None]:
for(i in seq_along(anchor.input)){
    for(x in seq_along(anchor.input[[i]])){
            element <- TransferData(anchorset = anchor.input[[i]][[x]],
                            refdata = ref.list[[i]],
                            dims = 1:20,
                            weight.reduction = 'cca')
            element_name <- names(anchor.input[[i]][x])
            pd.list[[i]][[element_name]] <- element
    }
}

In [14]:
#Extracting predicted.id 
pd.id <- list()
for(i in 1:ntransfer){
    pd.id[[i]] <- list()
}


In [15]:
#Extracting predicted.id
for(i in seq_along(pd.list)){
    for(x in seq_along(pd.list[[i]])){
            element <- pd.list[[i]][[x]]['predicted.id']
            element_name <- names(pd.list[[i]][x])
            pd.id[[i]][element_name] <- element
    }
}


In [16]:
#Convert pd.id list to dataframe
for (i in seq_along(pd.id)) {
  pd.id[[i]] <- do.call(data.frame, pd.id[[i]])
  rownames(pd.id[[i]]) <- rownames(pd.list[[i]][[1]])
}


In [17]:
library(mclust) #For voting

#Creating character vector to store voting results
vote.list <- list()
for(i in 1:ntransfer){
    vote.list[[i]] <- character()
}


Package 'mclust' version 6.0.0
Type 'citation("mclust")' for citing this R package in publications.


Attaching package: 'mclust'


The following object is masked from 'package:purrr':

    map




In [18]:
#Voting
for(i in seq_along(pd.id)){
    for(x in 1:nrow(pd.id[[i]])){
        vote.list[[i]][x] <- majorityVote(as.character(pd.id[[i]][x,]))$majority
    }
}


In [19]:
#Adding votes to pd.id dataframe
for(x in seq_along(pd.id)){
    pd.id[[x]][,(ncol(pd.id[[x]])+1)] <- vote.list[[x]]
}


In [20]:
#Compare between methods and see which ones are the best
comparison <- list()
for(i in 1:ntransfer){
    comparison[[i]] <- data.frame()
}

for(i in seq_along(comparison)){
    for(x in 1:nrow(pd.id[[i]])){
        element <- pd.id[[i]][x,1:5] %in% pd.id[[i]][x,6]
        comparison[[i]] <- rbind(comparison[[i]], element)
    }
}

In [None]:
for(x in seq_along(comparison)){
    colnames(comparison[[x]]) <- colnames(pd.id[[x]][1:5])
    print(colSums(comparison[[x]] == TRUE))
}

In [22]:
#Extracting predicted.score
pd.sc <- list()
for(i in 1:ntransfer){
    pd.sc[[i]] <- list()
}

for(i in seq_along(pd.list)){
    for(x in seq_along(pd.list[[i]])){
        element <- pd.list[[i]][[x]]['prediction.score.max']
        element_name <- names(pd.list[[i]][x])
        pd.sc[[i]][element_name] <- element
    }
}


In [23]:
#Convert pd.sc list into dataframe
for (i in seq_along(pd.sc)) {
  pd.sc[[i]] <- do.call(data.frame, pd.sc[[i]])
  rownames(pd.sc[[i]]) <- rownames(pd.list[[i]][[1]])
}


In [24]:
#Compare prediction score between methods and see which ones are the best
for(i in seq_along(pd.sc)){
    max.score <- c()
    for(x in 1:nrow(pd.sc[[i]])){
        max.score[x] <- max(pd.sc[[i]][x,1:5])
    }
    pd.sc[[i]][,6] <- max.score
}


In [25]:
#Braun: #'Subregion', 'Subclass', 'CommonestSubdivision', 'AutoAnnotation', 'AutoAnnotationDefs'
#LNall: #'Subclass', 'ClusterName', 'Taxonomy_group', 'Region', 'Tissue', 'atlas'

In [26]:
#Adding voting results to metadata
pd.name <- c('Braun_Subregion',
             'Braun_Subclass',
             'Braun_CommonestSubdivision',
             'Braun_AutoAnnotation',
             'Braun_AutoAnnotationDefs',
             'LNall_Subclass',
             'LNall_ClusterName',
             'LNall_Taxonomy_group',
             'LNall_Region',
             'LNall_Tissue',
             'LNall_atlas',
             'Clark_CellType',
             'Clark_age',
             'Briscoe_Type_step1',
             'Briscoe_Type_step2',
             'Levine_top_level',
             'Levine_subtype',
             'Ginty_annotation',
             'Regev_annotation',
             'Regev_Type',
             'Regev_Region',
             'Renthal_subtype')


sc.name <- paste0(pd.name,'_score')

In [27]:
#Adding voting results to metadata
for(i in seq_along(pd.list)){
    vote.mtx <- data.frame(pd.id[[i]][,6])
    rownames(vote.mtx) <- rownames(pd.id[[i]])
    colnames(vote.mtx) <- pd.name[i]
    seurat <- AddMetaData(seurat, vote.mtx)
    
    max.score <- data.frame(pd.sc[[i]][,6])
    rownames(max.score) <- rownames(pd.sc[[i]])
    colnames(max.score) <- sc.name[i]
    seurat <- AddMetaData(seurat, max.score)
}


In [None]:
#Save results
saveRDS(seurat, "iGlut_pre_aggr_LT.rds")


In [None]:
# Taking out seurat metadata
mtx <- seurat@meta.data
rownames(mtx) <- mtx[,1]
mtx[,1] <- NULL


In [None]:
# Namining for column of interest
pd.name <- c('Braun_Subregion',
             'Braun_Subclass',
             'Braun_CommonestSubdivision',
             'Braun_AutoAnnotation',
             'Braun_AutoAnnotationDefs',
             'LNall_Subclass',
             'LNall_ClusterName',
             'LNall_Taxonomy_group',
             'LNall_Region',
             'LNall_Tissue',
             'LNall_atlas',
             'Clark_CellType',
             'Clark_age',
             'Briscoe_Type_step1',
             'Briscoe_Type_step2',
             'Levine_top_level',
             'Levine_subtype',
             'Ginty_annotation',
             'Regev_annotation',
             'Regev_Type',
             'Regev_Region',
             'Renthal_subtype')


sc.name <- paste0(pd.name,'_score')

In [None]:
# Tidying the data
t1 <- melt(mtx, id.vars=c('high_res_clusters', 'final_clustering'),
         measure.vars=pd.name)
t2 <- melt(mtx, id.vars=c('high_res_clusters', 'final_clustering'),
         measure.vars=sc.name)
t2[,3] <- gsub('_score', '', t2[,3])
t <- merge(t1, t2, by.x=c('high_res_clusters','variable'), by.y=c('high_res_clusters','variable'))
t[,5] <- NULL

colnames(t) <- c('high_res_clusters', 'atlas',
                'final_clustering', 'label', 'score')

t <- t[c('high_res_clusters', 'final_clustering', 'atlas',
        'label', 'score')]

In [None]:
#Create a list for each clusters

cl.list <- list()

for(i in 1:(length(unique(t$final_clustering)))){
    cl.list[[i]] <- t%>% filter(final_clustering %in% (i-1))
}

In [None]:
##Summarize and exporting cluster summary
df.list <- list()

for(i in seq_along(cl.list)){
    df.list[[i]] <- cl.list[[i]] %>%
          group_by(atlas, label) %>%
          summarize(Count = n(), Mean = mean(score, na.rm = TRUE)) %>%
          top_n(Count, n=1) %>%
          distinct(atlas, .keep_all=TRUE)
    df.list[[i]]$pct.Count <- df.list[[i]]$Count/length(unique(cl.list[[i]]$high_res_clusters))*100
    df.list[[i]]$cluster <- (i-1)
}

In [None]:
#Create final data with cluster summary
final_data=data.frame(matrix(nrow=0, ncol=1))
for(x in seq_along(df.list)){
    data=data.frame(matrix(nrow=1, ncol=0))
    
    for(i in 1:nrow(df.list[[x]])){
    data=cbind(data,df.list[[x]][i,c(2,4,5)])
    }
    
    final_data <- rbind(final_data, data)
}

In [None]:
#Adding rownames to final_data
rownames(final_data) <- (seq_along(df.list)-1)

#Adding colnames to final_data
test <- levels(df.list[[1]]$atlas)
new_colnames <- list()
shared_part <- c('label', 'Mean', 'pct.Count')
for(i in seq_along(test)){
    bridge <- list()
    for(x in seq_along(shared_part)){
        bridge[[x]] <- paste0(test[i],'_',shared_part[x])
    }
    new_colnames[[i]] <- unlist(bridge)
}
colnames(final_data) <- unlist(new_colnames)