In [None]:
GENES_FILE_PATH    = "path/to/fine_typing_selected_genes.rds"
COARSE_TYPED_DIR   = "path/to/coarse_typing_results/"  # directory containing coarse_<sampleID>.rds files


# plot TNKILC fine typing results

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
require(readr)
options(future.globals.maxSize = 1000 * 1024 ^2)
require(gridExtra)
set.seed(1)
options(repr.plot.res=300)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=10000)

In [None]:
set.seed(1)

scRNA_TNKILC = readr::read_rds(GENES_FILE_PATH)
scRNA_TNKILC@meta.data$technology = 'scRNA'
scRNA_TNKILC@meta.data$combined_cell_names = colnames(scRNA_TNKILC)

cellType = 'TNKILC'

selectedGenes = rownames(scRNA_TNKILC)

coarseTypedSeurats = list.files(COARSE_TYPED_DIR)[grep(x = list.files(COARSE_TYPED_DIR), pattern = "coarse.*.rds")]

sampleIDs = sample(gsub(x = coarseTypedSeurats, pattern = 'coarse_|.rds', replacement = ""))


## correlation plots, all samples

In [None]:
plotLists = paste('logFC_plotlist_', sampleIDs, '.rds', sep="")
plotLists

In [None]:
options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
for (pl in plotLists){
    if (file.exists(pl)){
    plotlist = read_rds(pl)
    ggsave(
       filename = gsub(x = pl, pattern = '.rds', replacement = '.pdf'), 
       plot = marrangeGrob(plotlist, nrow=1, ncol=1), 
       width = 15, height = 10
    )
        }
}

## correlation heatmap

In [None]:
# read in scRNA glmm
# read in merfish glmm
# calculate correlation coeff all genes
# calculate corr coeff selected genes
# store these variables in two dataframes

### note: some genes in merfish are not present in scRNA

In [None]:
sampleID = sampleIDs[1]
scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
    mutate(technology = 'scRNA')
merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
    mutate(technology = 'MERFISH')
left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(is.na(logFC_scRNA)) %>% select(feature) %>% distinct() %>% as.matrix() %>% as.vector() %>% length() #%>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA))

### here we join the marginal effects and calculate correlations

In [None]:
correlations = data.frame()
correlations

In [None]:
for (sampleID in sampleIDs){
    if (file.exists(glue('effects_marginal_merfish_', sampleID, '.csv'))){
    
    scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'scRNA')
    merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'MERFISH')
    correlations = rbind(correlations, left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(!is.na(logFC_scRNA)) %>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA)) %>% mutate(sampleID = sampleID))
        }
}

In [None]:
temp = correlations %>% pivot_wider(values_from = corr, names_from = sampleID) %>% as.matrix()
temp[is.na(temp)] = 0
temp
options(repr.plot.res=300, repr.plot.height = 15, repr.plot.width = 15)
rownames(temp) = temp[,'contrast']
temp = temp[,2:ncol(temp)]


In [None]:
temp[is.na(temp)] = 0
temp = apply(temp, c(1,2), as.numeric)
temp

In [None]:
require(circlize)
col_fun = colorRamp2(c(min(temp), max(temp)), c("white", "red"))
Heatmap(temp, name = 'Correlation', col = col_fun, cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(temp[i, j],1), x, y)
    }, column_title = 'TNKILC: all MERFISH genes',)

### correlations with selected genes only

In [None]:
selectedGenes

In [None]:
correlations_selectedGenes = data.frame()
correlations_selectedGenes

In [None]:
for (sampleID in sampleIDs){
    if (file.exists(glue('effects_marginal_merfish_', sampleID, '.csv'))) {
    
    scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'scRNA') %>% filter(feature %in% selectedGenes)
    merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'MERFISH') %>% filter(feature %in% selectedGenes)
    correlations_selectedGenes = rbind(correlations_selectedGenes, left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(!is.na(logFC_scRNA)) %>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA)) %>% mutate(sampleID = sampleID))
        }
}

In [None]:
temp = correlations_selectedGenes %>% filter() %>% pivot_wider(values_from = corr, names_from = sampleID) %>% as.matrix()
rownames(temp) = temp[,'contrast']
temp = temp[,2:ncol(temp)]
temp[is.na(temp)] = 0
temp = apply(temp, c(1,2), as.numeric)

In [None]:
Heatmap(temp, name = 'Correlation', col = col_fun, cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(temp[i, j],1), x, y)
    }, column_title = 'TNKILC: selected genes',)

### tabulate numbers of cells per type per sample

In [None]:
cell_numbers = data.frame()
cell_percent = data.frame()

In [None]:
for (sampleID in sampleIDs){
    merged_TNKILC = read_rds(glue('TNKILC_fine_types_', sampleID, '.rds'))
    cell_percent = rbind(cell_percent, table(merged_TNKILC@meta.data$cleaned_fine_types, merged_TNKILC@meta.data$technology) %>% 
                         as.data.frame() %>% 
                         filter(Freq > 0) %>% 
                         rename('cleaned_fine_type' = Var1, 
                                'technology' = Var2) %>% 
                         pivot_wider(values_from = Freq, 
                                     names_from = technology) %>%
                         na.omit() %>% 
                         mutate(MERFISH = round(100 * na.omit(.$MERFISH)/sum(na.omit(.$MERFISH)), 2), 
                                scRNA = round(100 * na.omit(.$scRNA)/sum(na.omit(.$scRNA)), 2)) %>% 
                         mutate(sampleID = sampleID))
    cell_numbers = rbind(cell_numbers, 
                         table(merged_TNKILC@meta.data$cleaned_fine_types, 
                               merged_TNKILC@meta.data$technology) %>% 
                         as.data.frame() %>% 
                         filter(Freq > 0) %>% 
                         rename('cleaned_fine_type' = Var1, 
                                'technology' = Var2) %>% 
                         pivot_wider(values_from = Freq, 
                                     names_from = technology) %>% 
                         mutate(sampleID = sampleID))

}

In [None]:
cell_numbers[is.na(cell_numbers)] = 0

## plots in space

In [None]:
for (sampleID in sampleIDs){
    merged_TNKILC = read_rds(glue('TNKILC_fine_types_', sampleID, '.rds'))
    coarse_types = readr::read_rds(glue(COARSE_TYPED_DIR, 'coarse_', sampleID, '.rds'))
    p1 = ggplot(merged_TNKILC@meta.data %>% filter(technology == 'MERFISH')) +
    geom_point(shape = '.', aes(x = x, y = y, color = cleaned_fine_types)) +
    ggthemes::scale_color_tableau('Tableau 20') +
    theme_minimal(base_size = 18) +
    guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
    coord_sf() 

    constantPalette = rep(x = 'red', times = length(unique(coarse_types@meta.data$ClusterTop)) +
                                                   length(unique(merged_TNKILC@meta.data$cleaned_fine_types)))

    p2 = full_join(coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop),
             merged_TNKILC@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, cleaned_fine_types),
             by = join_by(ClusterTop == cleaned_fine_types, x, y)) %>%
    rename('Clusters' = ClusterTop) %>%
    ggplot() + 
    geom_point(shape = '.', aes(x = x, y = y), color = 'red') +
    theme_minimal(base_size=18) +
    scale_color_manual(values = constantPalette) +
    coord_sf() +
    facet_wrap(~Clusters) +
    gghighlight()

    p3 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        guides(colour = guide_legend(override.aes = list(size=10, shape=16))) 

    p4 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        facet_wrap(~ClusterTop) +
        gghighlight()

    plotList = list(p1, p2, p3, p4)
    ggsave(
       filename = glue(sampleID, "_TNKILC_spatial_plots.pdf"), 
       plot = marrangeGrob(plotList, nrow=1, ncol=1), 
       width = 20, height = 20
    )
}