In [None]:
## Notebook env: farnaz_spatial (R kernel)

In [1]:
library(Seurat)
library(SeuratObject)
library(Matrix)
library(biomaRt)

Attaching SeuratObject



In [2]:
paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/seurat/', c('A1', 'B1', 'C1', 'D1'), '_seuratprocessed.rds')
slices_seurat = lapply(X = paths, FUN = readRDS)

In [3]:
names(slices_seurat) = c('A1', 'B1', 'C1', 'D1')

In [4]:
normalized_mtxs = lapply(slices_seurat, GetAssayData, assay = 'SCT', slot = 'data')

## 1. Celltype based cellphonedb data prep

In [125]:
mtx_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/celltype_prepped_data/', names(slices_seurat), '_data', '/matrix.mtx')
feature_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/celltype_prepped_data/', names(slices_seurat), '_data', '/features.tsv')
barcode_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/celltype_prepped_data/', names(slices_seurat), '_data', '/barcodes.tsv')
meta_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/celltype_prepped_data/', names(slices_seurat), '_meta.tsv')

In [126]:
mtx_paths

In [32]:
mouse_features = lapply(normalized_mtxs, rownames)

In [7]:
# Basic function to convert mouse to human gene names
convertMouseGeneList <- function(x){
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T)
humanx <- unique(genesV2[, 2])
# Print the first 6 genes found to the screen
print(head(humanx))
return(humanx)
}


library(dplyr)

mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_mouse_to_human <- function(gene_list){

  output = c()

  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }

  return (output)
}


Attaching package: ‘dplyr’


The following object is masked from ‘package:biomaRt’:

    select


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [37]:
## a single run of function should take 5 mins -- this loop finds genes that find a 1:1 mouse:human mapping

mouse_to_human <- function(features_mouse){
    features_human = vector(mode = 'character', length = length(features_mouse))
    tracker = vector(mode = 'character', length = length(features_mouse))
    pb = txtProgressBar(min = 0, max = length(features_mouse), initial = 0, style = 3)
    for (i in 1:length(features_mouse)){
        out = convert_mouse_to_human(features_mouse[i])
        if (length(out) < 1){
            tracker[i] = 'none'
        }
        if (length(out) > 1){
            tracker[i] = 'multiple'
        }
        if (length(out) == 1){
            features_human[i] = out
            tracker[i] = 'single'
        }
        setTxtProgressBar(pb,i)
    }
    return(list(features_human, tracker))
    close(pb)
}


In [39]:
human_output = lapply(mouse_features, mouse_to_human)



“longer object length is not a multiple of shorter object length”




“longer object length is not a multiple of shorter object length”




“longer object length is not a multiple of shorter object length”




“longer object length is not a multiple of shorter object length”




“longer object length is not a multiple of shorter object length”




In [41]:
gene_filtered_mtxs = mapply(FUN = function(mtx, human_out){return(mtx[which(human_out[[2]] == 'single'),])}, 
                            normalized_mtxs, human_output)

In [None]:
lapply(gene_filtered_mtxs, dim)

In [44]:
barcodes = lapply(gene_filtered_mtxs, colnames)

In [49]:
mapply(FUN = writeMM, gene_filtered_mtxs, mtx_paths)

In [51]:
human_features = lapply(human_output, function(human_out){human_out[[1]][which(human_out[[2]] == 'single')]})

In [52]:
lapply(human_features, length)

In [53]:
# Save normalised counts - NOT scaled!
# save gene and cell names
mapply(FUN = write, human_features, feature_paths)
mapply(FUN = write, barcodes, barcode_paths)

In [59]:
head(slices_seurat[[1]]@assays$predictions@data)

Unnamed: 0,AAACAAGTATCTCCCA-1,AAACACCAATAACTGC-1,AAACAGCTTTCAGAAG-1,AAACAGGGTCTATATT-1,AAACAGTGTTCCTGGG-1,AAACATGGTGAGAGGA-1,AAACATTTCCCGGATT-1,AAACCGGGTAGGTACC-1,AAACCGTTCGTCCAGG-1,AAACCTAAGCAGCCGG-1,⋯,TTGTGGTAGGAGGGAT-1,TTGTGGTGGTACTAAG-1,TTGTGTATGCCACCAA-1,TTGTGTTTCCCGAAAG-1,TTGTTCTAGATACGCT-1,TTGTTTCACATCCAGG-1,TTGTTTCATTAGTCTA-1,TTGTTTCCATACAACT-1,TTGTTTGTATTACACG-1,TTGTTTGTGTAAATTC-1
CD14-cells-cluster30,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,⋯,0.0,0,0.0,0,0,0.0,0.0,0.0,0,0
B-cells,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,⋯,0.0,0,0.0,0,0,0.0,0.0,0.0,0,0
Th1-cells,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,⋯,0.0,0,0.0,0,0,0.0,0.0,0.0,0,0
Macrophages-cluster28,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,⋯,0.0,0,0.0,0,0,0.0,0.0,0.0,0,0
VSM,0.02393297,0,0.02521082,0.02367646,0,0.0191896,0.02436918,0,0.01608867,0,⋯,0.01997683,0,0.01635382,0,0,0.02528993,0.01914578,0.02059904,0,0
NK-cells,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,⋯,0.0,0,0.0,0,0,0.0,0.0,0.0,0,0


In [63]:
seurat_decompositions = readRDS('/home/ssobti/projects/farnaz_spatial/output_data/seurat/seurat_mBAT_ref_decomposition_scores.rds')
seurat_decompositions = lapply(seurat_decompositions, t)

In [67]:
cell_type_nms_corrected <- gsub("-", "_", colnames(seurat_decompositions[[1]]))

In [68]:
seurat_preds <- lapply(seurat_decompositions, function(x) cell_type_nms_corrected[apply(x, 1, which.max)])

In [70]:
head(seurat_preds[[1]])

In [74]:
## cell id and celltype dfs
meta_dfs = list()

for (i in 1:4){
    slices_seurat[[i]]@meta.data <- dplyr::mutate(slices_seurat[[i]]@meta.data, cell_id = rownames(slices_seurat[[i]]@meta.data), 
                                                  celltype = seurat_preds[[i]])
    meta_dfs[[i]] = slices_seurat[[i]]@meta.data[, c('cell_id', 'celltype')]
}

In [76]:
mapply(write.table, x = meta_dfs, file = meta_paths, sep = '\t', quote = F, row.names = F)

## 2. Cluster based cellphonedb data prep

In [96]:
meta_dfs = list()

for (i in 1:4){
    meta_dfs[[i]] = slices_seurat[[i]]@meta.data[, c('cell_id', 'seurat_clusters')]
    meta_dfs[[i]]$seurat_clusters = paste0('cluster', meta_dfs[[i]]$seurat_clusters)
}

In [20]:
mtx_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/cluster_prepped_data/', names(slices_seurat), '_data', '/matrix.mtx')
feature_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/cluster_prepped_data/', names(slices_seurat), '_data', '/features.tsv')
barcode_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/cluster_prepped_data/', names(slices_seurat), '_data', '/barcodes.tsv')
meta_paths = paste0('/home/ssobti/projects/farnaz_spatial/output_data/cellphonedb/cluster_prepped_data/', names(slices_seurat), '_meta.tsv')

In [98]:
mapply(FUN = writeMM, gene_filtered_mtxs, mtx_paths)
mapply(FUN = write, human_features, feature_paths)
mapply(FUN = write, barcodes, barcode_paths)
mapply(write.table, x = meta_dfs, file = meta_paths, sep = '\t', quote = F, row.names = F)