# plot Epi fine typing results

## wishlist:

- all correlation plots, all samples
- correlation heatmap
- plots in space

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
require(readr)
options(future.globals.maxSize = 1000 * 1024 ^2)
require(gridExtra)
set.seed(1)
options(repr.plot.res=300)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=10000)

In [None]:
source('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Strom/fine_typing_libraries.r')

loadLibraries()
set.seed(1)

scRNA_Epi = readr::read_rds('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/Epi_fineTyping_selected_genes.rds')
scRNA_Epi@meta.data$technology = 'scRNA'
scRNA_Epi@meta.data$combined_cell_names = colnames(scRNA_Epi)

cellType = 'Epi'

selectedGenes = rownames(scRNA_Epi)

coarseTypedSeurats = list.files('/n/scratch/users/m/mup728/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/')[grep(x = list.files('/n/scratch/users/m/mup728/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/'), pattern = "coarse.*.rds")]

outputs = list.files('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Epi/logFC_plotlist.*')
sampleIDs = sample(gsub(x = coarseTypedSeurats, pattern = 'coarse_|.rds', replacement = ""))


In [None]:
outputs = list.files('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Epi/')[grep(x = list.files('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Fine_typing_with_weighted_KNN/Epi/'), pattern = 'Epi_fine_types_')]
outputs
sampleIDs = sample(gsub(x = outputs, pattern = 'Epi_fine_types_|.rds', replacement = ""))
sampleIDs
length(sampleIDs)

# Which epi fine types are associated with healthy or tumor status?

### Using original pelka labels

In [None]:
complete_pelka = readr::read_rds('/n/scratch/users/m/mup728/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/complete_pelka_dataset.rds')


In [None]:
colnames(complete_pelka@meta.data)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

complete_pelka@meta.data %>%
filter(clTopLevel	 == 'Epi') %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(cl295v11SubFull, MMRStatus, batchID) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~cl295v11SubFull, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

complete_pelka@meta.data %>%
filter(clTopLevel	 == 'Epi') %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
mutate(MMRStatus = factor(.$MMRStatus, levels = c('Healthy', 'MMRp', 'MMRd'))) %>%
group_by(cl295v11SubFull, MMRStatus, batchID) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~cl295v11SubFull, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
tumorSpecifcPopulations = complete_pelka@meta.data %>%
filter(clTopLevel	 == 'Epi') %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(cl295v11SubFull, MMRStatus, batchID) %>%
summarize(n = n()) %>% 
pivot_wider(id_cols = cl295v11SubFull, values_from = n, names_from = MMRStatus, values_fn = list) %>%
mutate(meanHealthy = mean(unlist(Healthy)), meanTumor = mean(unlist(Tumor)), TumorSpecific = meanTumor > meanHealthy)
tumorSpecifcPopulations$t.test = map(1:nrow(tumorSpecifcPopulations), function(i){
    return(t.test(unlist(tumorSpecifcPopulations$Healthy[i]), unlist(tumorSpecifcPopulations$Tumor[i]), alternative = 't')$p.value)
})
tumorSpecifcPopulations$Signif = tumorSpecifcPopulations$t.test < 0.05
tumorSpecifcPopulations %>% filter(Signif)

### Using my cleaned/merged reference

In [None]:
colnames(scRNA_Epi@meta.data)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Epi@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(cleaned_fine_types, MMRStatus, batchID) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~cleaned_fine_types, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)

scRNA_Epi@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return(x)}}))) %>%
mutate(MMRStatus = factor(.$MMRStatus, levels = c('Healthy', 'MMRp', 'MMRd'))) %>%
group_by(cleaned_fine_types, MMRStatus, batchID) %>%
summarize(n = n()) %>%
ggboxplot(x = "MMRStatus", y = "n",
                color = "MMRStatus", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
                add = "jitter", shape = "MMRStatus") + 
scale_y_continuous(trans = 'log10') +
facet_wrap(~cleaned_fine_types, scale = 'free_x') +
stat_compare_means() + # Add pairwise comparisons p-value
stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = "Healthy")

#### t tests

In [None]:
tumorSpecifcPopulations_cleaned = scRNA_Epi@meta.data %>%
mutate(MMRStatus = unlist(lapply(MMRStatus, function(x){
    if (is.na(x)){return('Healthy')} else{return('Tumor')}}))) %>%
group_by(cleaned_fine_types, MMRStatus, batchID) %>%
summarize(n = n()) %>% 
pivot_wider(id_cols = cleaned_fine_types, values_from = n, names_from = MMRStatus, values_fn = list) %>%
mutate(meanHealthy = mean(unlist(Healthy)), meanTumor = mean(unlist(Tumor)), TumorSpecific = meanTumor > meanHealthy)
tumorSpecifcPopulations_cleaned$t.test = map(1:nrow(tumorSpecifcPopulations_cleaned), function(i){
    return(t.test(unlist(tumorSpecifcPopulations_cleaned$Healthy[i]), unlist(tumorSpecifcPopulations_cleaned$Tumor[i]), alternative = 't')$p.value)
})
tumorSpecifcPopulations_cleaned$Signif = tumorSpecifcPopulations_cleaned$t.test < 0.05
tumorSpecifcPopulations_cleaned %>% filter(Signif)

## correlation plots, all samples

In [None]:
plotLists = paste('logFC_plotlist_', sampleIDs, '.rds', sep="")
plotLists

In [None]:
options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
for (pl in plotLists){
    plotlist = read_rds(pl)
    ggsave(
       filename = gsub(x = pl, pattern = '.rds', replacement = '.pdf'), 
       plot = marrangeGrob(plotlist, nrow=1, ncol=1), 
       width = 15, height = 10
    )
}

## correlation heatmap

In [None]:
# read in scRNA glmm
# read in merfish glmm
# calculate correlation coeff all genes
# calculate corr coeff selected genes
# store these variables in two dataframes

### note: some genes in merfish are not present in scRNA

In [None]:
sampleID = sampleIDs[1]
scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
    mutate(technology = 'scRNA')
merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
    mutate(technology = 'MERFISH')
left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(is.na(logFC_scRNA)) %>% select(feature) %>% distinct() %>% as.matrix() %>% as.vector() %>% length() #%>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA))

### here we join the marginal effects and calculate correlations

In [None]:
correlations = data.frame()
correlations

In [None]:
for (sampleID in sampleIDs){
    scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'scRNA')
    merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'MERFISH')
    correlations = rbind(correlations, left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(!is.na(logFC_scRNA)) %>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA)) %>% mutate(sampleID = sampleID))
}

In [None]:
temp = correlations %>% pivot_wider(values_from = corr, names_from = sampleID) %>% as.matrix()
options(repr.plot.res=300, repr.plot.height = 15, repr.plot.width = 15)
rownames(temp) = temp[,'contrast']
temp = temp[,2:ncol(temp)]
rowOrder = hclust(dist(temp, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
colOrder = hclust(dist(t(temp), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
temp = temp[rowOrder, colOrder] %>% as.data.frame() %>% mutate(contrast = rownames(.))
temp %>% as.data.frame() %>% pivot_longer(cols = colnames(.)[colnames(.) != 'contrast']) %>% rename('sampleID' = name, 'corr' = value) %>% mutate(corr = as.numeric(corr)) %>% 
ggplot(aes(y = contrast, x = sampleID)) +
geom_tile(aes(fill = corr)) +
geom_label(aes(label = round(corr, 1)), 
           color = 'red') +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
scale_fill_viridis_c(option = 'inferno', na.value = 'white', direction = -1) + #, limits = c(0, 1)) 
ggtitle('Epi: all MERFISH genes')

In [None]:
annotations = c(MSS = "G4669_reg0", 
MSS = "G4669_reg1",
MSI = "C107", 
MSI = "C163", 
MSI = "C167", 
MSS = "G4738_Beta10_06", 
MSS = "G4712_Beta8", 
MSS = "G4659",
MSS = "G4659_Beta8", 
MSS = "G4712_Beta10", 
MSS = "G4630", 
MSI = "C164", 
MSI = "G4595", 
MSI = "G4554", 
MSI = "C110", 
MSI = "G4209", 
MSI = "G4423", 
MSI = "Jax001", 
#MSS = "G4695", 
MSS = "G4738_Beta10_08", 
MSI = "C123", 
MSI = "G4671_res") %>% 
data.frame(sampleID = ., condition = names(.)) %>% 
mutate(color = gsub(pattern = 'MSI', replacement = 'black', x = .$condition)) %>%
mutate(color = gsub(pattern = 'MSS', replacement = 'grey', x = .$color))
annotations
col = annotations$color
names(col) = annotations$condition
ha = HeatmapAnnotation(
Condition = annotations$condition,
    col = list(Condition = col),
    gp = gpar(col = "black")
)
ha
temp[is.na(temp)] = 0
temp = temp %>% select(!contrast) %>% as.matrix() %>% apply(MARGIN = c(1,2), FUN = as.numeric)
col = annotations$color
names(col) = annotations$condition
length(col)
dim(temp)
ha = HeatmapAnnotation(
Condition = annotations$condition,
    col = list(Condition = col),
    gp = gpar(col = "black")
)
ha
Heatmap(temp, name = 'Correlation', column_km = 2, row_km = 4, cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(temp[i, j],1), x, y)
    }, column_title = 'Epi: all MERFISH genes', top_annotation = ha)

### correlations with selected genes only

In [None]:
selectedGenes

In [None]:
correlations_selectedGenes = data.frame()
correlations_selectedGenes

In [None]:
for (sampleID in sampleIDs){
    scRNA_marginal_effects = read.csv(glue('effects_marginal_scRNA_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'scRNA') %>% filter(feature %in% selectedGenes)
    merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1) %>%
        mutate(technology = 'MERFISH') %>% filter(feature %in% selectedGenes)
    correlations_selectedGenes = rbind(correlations_selectedGenes, left_join(merfish_marginal_effects %>% select(contrast, feature, logFC, technology), scRNA_marginal_effects %>% select(contrast, feature, logFC, technology), join_by(contrast, feature), suffix = c('_MERFISH', '_scRNA')) %>% filter(!is.na(logFC_scRNA)) %>% group_by(contrast) %>% na.omit() %>% summarize(corr = cor(logFC_MERFISH, logFC_scRNA)) %>% mutate(sampleID = sampleID))
}

In [None]:
temp = correlations_selectedGenes %>% filter() %>% pivot_wider(values_from = corr, names_from = sampleID) %>% as.matrix()
rownames(temp) = temp[,'contrast']
temp = temp[,2:ncol(temp)]
rowOrder = hclust(dist(temp, 
                       method = "euclidean"), 
                  method = "ward.D" )$order
colOrder = hclust(dist(t(temp), 
                       method = "euclidean"), 
                  method = "ward.D" )$order
temp = temp[rowOrder, colOrder] %>% as.data.frame() %>% mutate(contrast = rownames(.))
temp %>% as.data.frame() %>% pivot_longer(cols = colnames(.)[colnames(.) != 'contrast']) %>% rename('sampleID' = name, 'corr' = value) %>% mutate(corr = as.numeric(corr)) %>% 
ggplot(aes(y = contrast, x = sampleID)) +
geom_tile(aes(fill = corr)) +
geom_label(aes(label = round(corr, 1)), 
           color = 'red') +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, 
                                 vjust = 0.5, 
                                 hjust=1)) +
scale_fill_viridis_c(option = 'inferno', na.value = 'white', direction = -1) + 
ggtitle('Epi: selected genes') #, limits = c(0, 1))  


In [None]:
temp[is.na(temp)] = 0
temp = temp %>% select(!contrast) %>% as.matrix() %>% apply(MARGIN = c(1,2), FUN = as.numeric)
Heatmap(temp, name = 'Correlation', column_km = 2, row_km = 4, cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(temp[i, j],1), x, y)
    }, column_title = 'Epi: selected genes', top_annotation = ha)

### tabulate numbers of cells per type per sample

In [None]:
cell_numbers = data.frame()
cell_percent = data.frame()

In [None]:
for (sampleID in sampleIDs){
    merged_Epi = read_rds(glue('Epi_fine_types_', sampleID, '.rds'))
    cell_percent = rbind(cell_percent, table(merged_Epi@meta.data$cleaned_fine_types, merged_Epi@meta.data$technology) %>% 
                         as.data.frame() %>% 
                         filter(Freq > 0) %>% 
                         rename('cleaned_fine_type' = Var1, 
                                'technology' = Var2) %>% 
                         pivot_wider(values_from = Freq, 
                                     names_from = technology) %>%
                         na.omit() %>% 
                         mutate(MERFISH = round(100 * na.omit(.$MERFISH)/sum(na.omit(.$MERFISH)), 2), 
                                scRNA = round(100 * na.omit(.$scRNA)/sum(na.omit(.$scRNA)), 2)) %>% 
                         mutate(sampleID = sampleID))
    cell_numbers = rbind(cell_numbers, 
                         table(merged_Epi@meta.data$cleaned_fine_types, 
                               merged_Epi@meta.data$technology) %>% 
                         as.data.frame() %>% 
                         filter(Freq > 0) %>% 
                         rename('cleaned_fine_type' = Var1, 
                                'technology' = Var2) %>% 
                         pivot_wider(values_from = Freq, 
                                     names_from = technology) %>% 
                         mutate(sampleID = sampleID))

}

In [None]:
cell_numbers[is.na(cell_numbers)] = 0

In [None]:
cell_numbers

In [None]:
cell_percent 

In [None]:
(cell_percent %>% filter(is.na(MERFISH)))
(cell_numbers %>% filter(is.na(MERFISH)))

In [None]:
cell_percent %>% 
left_join(annotations) %>%
pivot_wider(id_cols = c(condition,sampleID), values_from = MERFISH, names_from  = cleaned_fine_type)

options(repr.plot.width = 15, repr.plot.height = 15, repr.plot.res = 300)

temp = cell_percent %>% 
left_join(annotations) %>%
pivot_wider(id_cols = c(condition,sampleID), values_from = MERFISH, names_from  = cleaned_fine_type) %>% as.data.frame()
rownames(temp) = temp$sampleID
temp = temp %>%
select(!c(condition, sampleID)) %>%
as.matrix()
temp

temp[is.na(temp)] = 0
temp = temp %>% as.matrix() %>% apply(MARGIN = c(1,2), FUN = as.numeric) %>% t()
col_fun = colorRamp2(c(0, 25, 50, 75, 100), c("#ffffcc","#a1dab4","#41b6c4","#2c7fb8","#253494"))
Heatmap(temp, name = 'Cell percent', column_km = 2, row_km = 4, cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(temp[i, j],1), x, y)
    }, column_title = 'Epi: cell type percent', top_annotation = ha, col = col_fun)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 15, repr.plot.res = 300)
cell_numbers %>% 
ggplot() + 
geom_col(aes(y = sampleID, x = MERFISH, fill = cleaned_fine_type)) +
scale_fill_tableau('Tableau 20') +
theme_minimal(base_size = 12) +
scale_x_continuous(trans = 'log10') +
facet_wrap(~cleaned_fine_type)

## plots in space

#### All populations

In [None]:
for (sampleID in sampleIDs){
    merged_Epi = read_rds(glue('Epi_fine_types_', sampleID, '.rds'))
    coarse_types = readr::read_rds(glue('/n/scratch/users/m/mup728/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/coarse_',
                                        sampleID,
                                        '.rds'))
    p1 = ggplot(merged_Epi@meta.data %>% filter(technology == 'MERFISH')) +
    geom_point(shape = '.', aes(x = x, y = y, color = cleaned_fine_types)) +
    ggthemes::scale_color_tableau('Tableau 20') +
    theme_minimal(base_size = 18) +
    guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
    coord_sf() 

    constantPalette = rep(x = 'red', times = length(unique(coarse_types@meta.data$ClusterTop)) +
                                                   length(unique(merged_Epi@meta.data$cleaned_fine_types)))

    #p2 = full_join(coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop),
     #        merged_Epi@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, cleaned_fine_types),
     #        by = join_by(ClusterTop == cleaned_fine_types, x, y)) %>%
    p2 = full_join(coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>% mutate(ClusterTop = 'background'),
             merged_Epi@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, cleaned_fine_types),
             by = join_by(ClusterTop == cleaned_fine_types, x, y)) %>%
    rename('Clusters' = ClusterTop) %>%
    ggplot() + 
    geom_point(shape = '.', aes(x = x, y = y), color = 'red') +
    theme_minimal(base_size=18) +
    scale_color_manual(values = constantPalette) +
    coord_sf() +
    facet_wrap(~Clusters, nrow = 2) +
    gghighlight()

    p3 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        guides(colour = guide_legend(override.aes = list(size=10, shape=16))) 

    p4 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        facet_wrap(~ClusterTop) +
        gghighlight()
    
    merged_Epi@meta.data$TumorTypes = merged_Epi@meta.data$cleaned_fine_types %in% tumorSpecifcPopulations_cleaned$cleaned_fine_types[tumorSpecifcPopulations_cleaned$Signif & tumorSpecifcPopulations_cleaned$TumorSpecific]
    temp = merged_Epi@meta.data[sample(1:nrow(merged_Epi@meta.data), nrow(merged_Epi@meta.data)),]
    p5 = ggplot(temp %>% filter(technology == 'MERFISH')) +
    geom_point(shape = '.', aes(x = x, y = y, color = TumorTypes)) +
    ggthemes::scale_color_tableau('Classic Color Blind') +
    theme_minimal(base_size = 18) +
    guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
    coord_sf() + ggtitle(glue('Tumor-specific epithelial fine types in ', sampleID))

    p6 = p5 + facet_wrap(~TumorTypes) + gghighlight() + ggtitle(glue('Tumor-specific epithelial fine types in ', sampleID))

    plotList = list(p1, p2, p3, p4, p5, p6)
    
    names(plotList) = c('fine_types', 'fine_types_facet', 'coarse_types', 'coarse_types_facet', 'tumor_specific', 'tumor_specific_facet')
    ggsave(
       filename = glue( sampleID, "_Epi_spatial_plots.pdf"), 
       plot = marrangeGrob(plotList, nrow=1, ncol=1), 
       width = 20, height = 20
    )
    for (p in 1:length(plotList)){
        ggsave(filename = glue('Spatial_plots/', sampleID, '/', sampleID, '_', names(plotList)[p], '.png'), plot = plotList[[p]], width = 20, height = 10, units = 'in')
    }
}

In [None]:
   p1 = ggplot(merged_Epi@meta.data %>% filter(technology == 'MERFISH')) +
    geom_point(shape = '.', aes(x = x, y = y, color = cleaned_fine_types)) +
    ggthemes::scale_color_tableau('Tableau 20') +
    theme_minimal(base_size = 18) +
    guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
    coord_sf() 

    constantPalette = rep(x = 'red', times = length(unique(coarse_types@meta.data$ClusterTop)) +
                                                   length(unique(merged_Epi@meta.data$cleaned_fine_types)))

    #p2 = full_join(coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop),
     #        merged_Epi@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, cleaned_fine_types),
     #        by = join_by(ClusterTop == cleaned_fine_types, x, y)) %>%
    p2 = full_join(coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>% mutate(ClusterTop = 'background'),
             merged_Epi@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, cleaned_fine_types),
             by = join_by(ClusterTop == cleaned_fine_types, x, y)) %>%
    rename('Clusters' = ClusterTop) %>%
    ggplot() + 
    geom_point(shape = '.', aes(x = x, y = y), color = 'red') +
    theme_minimal(base_size=18) +
    scale_color_manual(values = constantPalette) +
    coord_sf() +
    facet_wrap(~Clusters, nrow = 2) +
    gghighlight()

    p3 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        guides(colour = guide_legend(override.aes = list(size=10, shape=16))) 

    p4 = coarse_types@meta.data %>% filter(technology == 'MERFISH') %>% select(x, y, ClusterTop) %>%
        ggplot() + 
        geom_point(shape = '.', aes(x = x, y = y, color = ClusterTop)) +
        theme_minimal(base_size=18) +
        scale_color_tableau('Color Blind') +
        coord_sf() +
        facet_wrap(~ClusterTop) +
        gghighlight()
    
    merged_Epi@meta.data$TumorTypes = merged_Epi@meta.data$cleaned_fine_types %in% tumorSpecifcPopulations_cleaned$cleaned_fine_types[tumorSpecifcPopulations_cleaned$Signif & tumorSpecifcPopulations_cleaned$TumorSpecific]
    temp = merged_Epi@meta.data[sample(1:nrow(merged_Epi@meta.data), nrow(merged_Epi@meta.data)),]
    p5 = ggplot(temp %>% filter(technology == 'MERFISH')) +
    geom_point(shape = '.', aes(x = x, y = y, color = TumorTypes)) +
    ggthemes::scale_color_tableau('Classic Color Blind') +
    theme_minimal(base_size = 18) +
    guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
    coord_sf() + ggtitle(glue('Tumor-specific epithelial fine types in ', sampleID))

    p6 = p5 + facet_wrap(~TumorTypes) + gghighlight() + ggtitle(glue('Tumor-specific epithelial fine types in ', sampleID))

    plotList = list(p1, p2, p3, p4, p5, p6)
    
    names(plotList) = c('fine_types', 'fine_types_facet', 'coarse_types', 'coarse_types_facet', 'tumor_specific', 'tumor_specific_facet')
    ggsave(
       filename = glue( sampleID, "_Epi_spatial_plots.pdf"), 
       plot = marrangeGrob(plotList, nrow=1, ncol=1), 
       width = 20, height = 20
    )
    for (p in 1:length(plotList)){
        ggsave(filename = glue('Spatial_plots/', sampleID, '/', sampleID, '_', names(plotList)[p], '.png'), plot = plotList[[p]], width = 20, height = 10, units = 'in')
    }

In [None]:
    for (p in 1:length(plotList)){
        ggsave(filename = glue('Spatial_plots/', sampleID, '/', sampleID, '_', names(plotList)[p], '.png'), plot = plotList[[p]], width = 30, height = 10, units = 'in')
    }

#### Tumor vs healthy

In [None]:
tumorSpecifcPopulations_cleaned %>% select(cleaned_fine_types, meanHealthy, meanTumor, TumorSpecific, t.test, Signif)

# Post-transfer merging step: for all MERFISH samples, compute glmm logFC for clusters. Correlate logFCs cluster-vs-cluster in MERFISH. 

Which clusters are indistinguishable, if any? If 2 clusters have r>e.g.0.95, merge them. Maybe more of a sanity check that it worked. 

In [None]:
all_merfish_marginal_effects=data.frame()
for (sampleID in sampleIDs){
    merfish_marginal_effects = read.csv(glue('effects_marginal_merfish_', sampleID, '.csv'), row.names = 1)
    merfish_marginal_effects$sampleID = sampleID
    all_merfish_marginal_effects = rbind(all_merfish_marginal_effects, merfish_marginal_effects)
}
all_merfish_marginal_effects %>% head()

In [None]:
logFC_cluster_correlations = matrix(data = NA, 
                                    nrow = length(unique(all_merfish_marginal_effects$contrast)),
                                   ncol = length(unique(all_merfish_marginal_effects$contrast)))
colnames(logFC_cluster_correlations) = all_merfish_marginal_effects$contrast %>% unique()
rownames(logFC_cluster_correlations) = all_merfish_marginal_effects$contrast %>% unique()
logFC_cluster_correlations

In [None]:
all_merfish_marginal_effects = all_merfish_marginal_effects[order(all_merfish_marginal_effects$feature),]
all_merfish_marginal_effects %>% head()

In [None]:
for (contrast1 in unique(all_merfish_marginal_effects$contrast)){
    for (contrast2 in unique(all_merfish_marginal_effects$contrast)){
        commonSamples = intersect(all_merfish_marginal_effects$sampleID[all_merfish_marginal_effects$contrast == contrast1], 
                               all_merfish_marginal_effects$sampleID[all_merfish_marginal_effects$contrast == contrast2])
        fc_contrast1 = all_merfish_marginal_effects$logFC[all_merfish_marginal_effects$contrast == contrast1 & all_merfish_marginal_effects$sampleID %in% commonSamples]
        fc_contrast2 = all_merfish_marginal_effects$logFC[all_merfish_marginal_effects$contrast == contrast2 & all_merfish_marginal_effects$sampleID %in% commonSamples]        
        
        logFC_cluster_correlations[contrast1, contrast2] = try(cor(fc_contrast1, fc_contrast2))
    }
}
logFC_cluster_correlations = apply(logFC_cluster_correlations, c(1,2), as.numeric)
logFC_cluster_correlations

In [None]:
options(repr.plot.width = 12, repr.plot.height = 12)
Heatmap(logFC_cluster_correlations, 
        name = 'Correlation', 
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
        grid.text(round(logFC_cluster_correlations[i, j],1), x, y)
    }, column_title = 'Epi: correlations between MERFISH clusters')