# Fine type Strom in G4209

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(tidyverse)
require(Seurat)
require(data.table)
require(lme4)
require(presto)
require(singlecellmethods)
require(future)
require(furrr)
require(gghighlight)
options(future.globals.maxSize = 1000 * 1024 ^2)
set.seed(1)
options(repr.plot.res=300)
set.seed(1)

# load processed scRNA reference dataset

In [None]:
scRNA_Strom = readr::read_rds('/n//scratch3/users/m/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/Strom_fineTyping_selected_genes.rds')

In [None]:
scRNA_Strom@meta.data$technology = 'scRNA'

In [None]:
scRNA_Strom@meta.data$combined_cell_names = colnames(scRNA_Strom)
length(unique(scRNA_Strom@meta.data$combined_cell_names))

## original and merged clusters - scRNA

In [None]:
table(scRNA_Strom@meta.data$ClusterFull, scRNA_Strom@meta.data$cleaned_fine_types) %>% as.data.frame() %>% filter(Freq > 0) %>% rename('Pelka_Cluster' = Var1, 'Merged_Cluster' = Var2)

## load coarse-typed MERFISH G4209

In [None]:
G4209 = readr::read_rds('/n/scratch3/users/m/mup728/Pelka_Baysor_segmentation/coarse_typing_with_weighted_knn/Coarse_typing_with_weighted_knn/MSI/coarse_G4209.rds')
G4209

In [None]:
colnames(G4209@meta.data)
table(G4209@meta.data$technology, G4209@meta.data$ClusterTop)

In [None]:
G4209@meta.data$technology %>% levels()
G4209@meta.data$ClusterTop %>% levels()

In [None]:
merfish_Strom = subset(G4209, subset = technology == 'MERFISH' & ClusterTop == 'Strom')
merfish_Strom = merfish_Strom[rownames(scRNA_Strom),]
merfish_Strom

## Merge cleaned Strom with MERFISH

In [None]:
merged_Strom = merge(scRNA_Strom, merfish_Strom)
merged_Strom

In [None]:
normFactor = merged_Strom@meta.data %>%
select(technology, nCount_RNA, orig.ident) %>%
group_by(technology, orig.ident) %>%
summarize(medianCounts = median(nCount_RNA)) 
normFactor = mean(normFactor$medianCounts) #median of medians (across technology and orig.ident) 
normFactor
merged_Strom = NormalizeData(merged_Strom, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    ScaleData(features = rownames(merged_Strom)) %>%
    singlecellmethods::RunBalancedPCA(weight.by='orig.ident', npcs=15)
merged_Strom

### Harmonize

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
merged_Strom = merged_Strom %>% harmony::RunHarmony(c("orig.ident", "technology"), 
    plot_convergence = TRUE, 
    lambda = NULL,
    max.iter = 10,
    early_stop = F,
    theta = c(4,4)#,
    #sigma = 0.2
) 

## post-harmony UMAP - merged Strom

In [None]:
U = uwot::umap(merged_Strom@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(merged_Strom)
merged_Strom[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(merged_Strom)
merged_Strom[['humap_fgraph']] = new_graph

In [None]:
temp = Embeddings(merged_Strom, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(merged_Strom@meta.data %>% select(combined_cell_names, technology, cleaned_fine_types), temp)
hump_embeddings = humap_embeddings %>% sample_n(size = nrow(.))
humap_embeddings %>% head()
humap_embeddings %>% tail()

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = technology)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - merged Strom') +
scale_color_colorblind() + 
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = technology)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - merged Strom') +
scale_color_colorblind() + 
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
facet_wrap(~technology) +
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 18, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = cleaned_fine_types)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - merged Strom') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
facet_wrap(~cleaned_fine_types) +
gghighlight::gghighlight()

# transfer labels with weighted knn

In [None]:
TransferLabels <- function(obj, ...) UseMethod("TransferLabels")

TransferLabels.default <- function(
    embedding, ## low dimensional space (e.g. cells-by-PCs)
    meta_data, ## meta data table 
    group.by.varname, ## Variable that splits reference and query 
    label.varname, ## Label I want to transfer
    from, ## Reference cell group(s)
    to, ## Query cell group(s)
    k = 10, ## Number nearest neighbors for prediction
    weighted = TRUE ## Given more weight to closer neighbors
) {
    if (is(embedding, 'data.frame')) {
        embedding <- as.matrix(as.data.frame(embedding))
    } else if (is(embedding, 'matrix')) {
        ## Good, do nothing! 
        
    } else {
        stop('Embedding must be compatible with matrix type.')
    }
    if (nrow(embedding) != nrow(meta_data)) {
        stop('Embedding and metadata must have same number of observations (rows).')
    }
    if (!group.by.varname %in% colnames(meta_data)) {
        stop(glue('Column named "{group.by.varname}" not defined in meta_data'))
    }
    if (!group.by.varname %in% colnames(meta_data)) {
        stop(glue('Column named "{label.varname}" not defined in meta_data'))
    }

    ## Use L2 distance instead of Euclidean 
    message('Use L2 distance instead of Euclidean')
    embedding <- singlecellmethods:::cosine_normalize_cpp(embedding, 1)    
    
    ids_from <- which(meta_data[[group.by.varname]] == from)
    ids_to <- which(meta_data[[group.by.varname]] == to)
    nn <- RANN::nn2(
        data = embedding[ids_from, ],
        query = embedding[ids_to, ],
        k = k,
        eps = 0
    )
    if (weighted) {
        ## Closer neighbors given more weight 
        probs <- prop.table(exp(-nn$nn.dists), 1)
        xvec <- c(t(probs))
    } else {
        ## Each neighbor given equal weight
        xvec <- rep(1, length(ids_to) * k)
    }
    nn_mat <- Matrix::sparseMatrix(
        i = rep(1:length(ids_to), each=k) ,
        j = c(t(nn$nn.idx)), 
        x = xvec,
        dims = c(length(ids_to), length(ids_from))
    )
    type_design <- Matrix::sparse.model.matrix(~0 + meta_data[ids_from, ][[label.varname]])
    colnames(type_design) <- gsub('meta_data.ids_from, ...label.varname..', '', colnames(type_design))
    prob_assign <- nn_mat %*% type_design
    hard_assign <- colnames(prob_assign)[max.col(prob_assign)]
    # confidence_assign <- apply(prob_assign, 1, max)
    # hard_assign[confidence_assign < confidence_thresh] <- NA
    meta_data[ids_to, ][[label.varname]] <- hard_assign
    meta_data[[paste0(label.varname, '_confidence')]] <- 1
    meta_data[ids_to, ][[paste0(label.varname, '_confidence')]] <- apply(prob_assign, 1, max)
    
    return(meta_data)
}


TransferLabels.Seurat <- function(
    obj, ## Seurat object
    reduction, ## reduction name for embeddings 
    group.by.varname, ## Variable that splits reference and query 
    label.varname, ## Label I want to transfer
    from, ## Reference cell group(s)
    to, ## Query cell group(s)
    k = 10, ## Number nearest neighbors for prediction
    weighted = TRUE ## Given more weight to closer neighbors
) {
    obj@meta.data <- TransferLabels.default(
        Seurat::Embeddings(obj, reduction = reduction), 
        obj@meta.data, 
        group.by.varname, 
        label.varname, 
        from, 
        to,
        k = k,
        weighted = weighted
    )
    return(obj)
}


In [None]:
merged_Strom = TransferLabels(merged_Strom, 
                               reduction = 'harmony', 
                               group.by.varname = 'technology', 
                               label.varname = 'cleaned_fine_types', 
                               from = 'scRNA', 
                               to = 'MERFISH', 
                               k = 20, 
                               weighted = TRUE)

In [None]:
merged_Strom

In [None]:
options(repr.plot.width = 30, repr.plot.height = 20)
temp = Embeddings(merged_Strom, 'humap') %>%
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(merged_Strom@meta.data %>% select(combined_cell_names, cleaned_fine_types, technology))
label.df = temp %>% 
  group_by(cleaned_fine_types) %>% 
  summarize(x = median(HUMAP_1), y = median(HUMAP_2)) %>% na.omit()
label.df
ggplot(temp) +
geom_point(aes(HUMAP_1, HUMAP_2, color = cleaned_fine_types), shape = '.') +
facet_wrap(~technology, ncol = 2) +
theme_pubr(base_size=18) +
ggthemes::scale_color_tableau('Tableau 20', name = "") +
guides(color = guide_legend(override.aes = list(size=10, shape=16))) +
theme(legend.position = 'right') +
ggrepel::geom_label_repel(data = label.df, aes(x=x, 
                                y=y, 
                                label=cleaned_fine_types, 
                                color=cleaned_fine_types),
           max.overlaps = Inf,
           force = 5,
           size = 8) +
ggtitle(paste('Coarse label transfer with KNN', sep = "")) #+
#gghighlight()

In [None]:
options(repr.plot.width = 30, repr.plot.height = 60)
constantPalette = rep('red', length(unique(merged_Strom@meta.data$cleaned_fine_types)))
names(constantPalette) = unique(merged_Strom@meta.data$cleaned_fine_types)
temp = Embeddings(merged_Strom, 'humap') %>%
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(merged_Strom@meta.data %>% select(combined_cell_names, cleaned_fine_types, technology))
ggplot(temp) +
geom_point(aes(HUMAP_1, HUMAP_2, color = cleaned_fine_types), shape = '.') +
theme_pubr(base_size=30) +
scale_color_manual(values = constantPalette) +
guides(color = guide_legend(override.aes = list(size=10, shape=16))) +
theme(legend.position = 'right') +
ggtitle(paste('Coarse label transfer with KNN', sep = "")) +
gghighlight() +
facet_wrap(~cleaned_fine_types + technology, ncol = 2)

# tabulate number of cells of each type

In [None]:
merged_Strom@meta.data %>% group_by(technology, cleaned_fine_types) %>% summarize(n = n()) %>% pivot_wider(names_from = technology, values_from = n)

# find markers

In [None]:
merfishCells = G4209@meta.data %>% filter(technology == 'MERFISH' & ClusterTop == 'Strom') %>% select(combined_cell_names) %>% as.matrix() %>% as.vector()
temp = merged_Strom@meta.data %>% filter(technology == 'MERFISH')
rownames(temp) = temp$combined_cell_names
merfish = CreateSeuratObject(GetAssayData(G4209, 'counts')[,merfishCells], meta.data = temp)
#merfish@meta.data$cleaned_fine_types = merged_Strom@meta.data[merfishCells,'cleaned_fine_types']
merfish

In [None]:
scRNACells = colnames(scRNA_Strom)
length(scRNACells)
temp = merged_Strom@meta.data %>% filter(technology == 'scRNA') 
rownames(temp) = temp$combined_cell_names
scRNA = CreateSeuratObject(GetAssayData(G4209, 'counts')[,scRNACells], meta.data = temp)
scRNA

In [None]:
sum(colnames(GetAssayData(scRNA, 'counts')) %in% rownames(temp))

In [None]:
merfish@meta.data$cleaned_fine_types %>% unique()
scRNA@meta.data$cleaned_fine_types %>% unique()

## find merfish markers with glmm

In [None]:
require(presto)
require(singlecellmethods)
pb = presto::collapse_counts(
    GetAssayData(merfish, 'counts'), 
    merfish@meta.data, 
    "cleaned_fine_types", 
    min_cells_per_group = 3
)
pb$meta_data %>% head()
dim(pb$counts)

colnames(merfish@meta.data)
dim(pb$meta_data)
head(pb$meta_data)
tail(pb$meta_data)

dim(pb$counts_mat)

In [None]:
pb$exprs_norm = pb$exprs_norm[rownames(pb$counts_mat), colnames(pb$counts_mat)]
dim(pb$exprs_norm)
pb$exprs_norm[1:5, ]

In [None]:
presto_res = readr::read_rds('G4209_merfish_Strom_fineTyping_GLMM.rds')

### Make contrasts

In [None]:
contrasts_mat = make_contrast.presto(
    presto_res, 
    var_contrast = 'cleaned_fine_types')
contrasts_mat

### Find marginal effects

In [None]:
effects_marginal = contrasts.presto(
    presto_res, 
    contrasts_mat, 
    one_tailed = TRUE
) %>% 
    dplyr::mutate(cluster = contrast) %>% 
    dplyr::mutate(
        logFC = sign(beta) * log2(exp(abs(beta))), ## convert stats to log2 for interpretability 
        SD = log2(exp(sigma)),
        zscore = logFC / SD
    ) %>%
    arrange(pvalue)

effects_marginal$fdr = p.adjust(effects_marginal$pvalue, method = 'BH')
effects_marginal$corr_fdr = effects_marginal$fdr
effects_marginal$corr_fdr[effects_marginal$fdr == 0] = min(effects_marginal$fdr[effects_marginal$fdr != 0])
effects_marginal$`-log10_fdr` = (-1) * log10(effects_marginal$corr_fdr) 
dim(effects_marginal)
head(effects_marginal)


In [None]:
meanExp = rowMeans(GetAssayData(merfish, 'data')) 
meanExp = data.frame(feature = names(meanExp), meanExp = meanExp)
for (cluster in unique(effects_marginal$cluster)) {
    temp = GetAssayData(merfish, 'counts')[,rownames(merfish@meta.data)[merfish@meta.data$cleaned_fine_types == cluster]] %>% as.data.frame()
    temp = temp %>%
    rowwise() %>%
    mutate(`N_zeros` = sum(c_across(everything()) == 0)) %>%
    select(`N_zeros`) %>% as.data.frame()
    rownames(temp) = rownames(GetAssayData(merfish, 'counts'))
    meanExp[,cluster] = temp$`N_zeros`/length(rownames(merfish@meta.data)[merfish@meta.data$cleaned_fine_types == cluster])
}
head(meanExp)

#effects_marginal = effects_marginal %>% left_join(meanExp, by = join_by(feature), relationship = 'many-to-one') 
#tail(effects_marginal)

In [None]:
temp = meanExp %>% pivot_longer(cols = unique(effects_marginal$contrast)) 
colnames(temp) = c('feature', 'meanExp', 'contrast', 'prop.expressed')
temp$prop.expressed = 1 - temp$prop.expressed 
effects_marginal = left_join(effects_marginal, temp, by = join_by(feature, contrast)) 
rm(temp)
head(effects_marginal)

In [None]:
effects_marginal_merfish = effects_marginal

## find scRNA markers with glmm

In [None]:
require(presto)
require(singlecellmethods)
pb = presto::collapse_counts(
    GetAssayData(scRNA, 'counts'), 
    scRNA@meta.data, 
    c("orig.ident","cleaned_fine_types"), 
    min_cells_per_group = 3
)
pb$meta_data %>% head()
dim(pb$counts)

colnames(scRNA@meta.data)
dim(pb$meta_data)
head(pb$meta_data)
tail(pb$meta_data)

dim(pb$counts_mat)

In [None]:
pb$exprs_norm = pb$exprs_norm[rownames(pb$counts_mat), colnames(pb$counts_mat)]
dim(pb$exprs_norm)
pb$exprs_norm[1:5, ]

In [None]:
presto_res = readr::read_rds('G4209_scRNA_Strom_fineTyping_GLMM.rds')

### Make contrasts

In [None]:
contrasts_mat = make_contrast.presto(
    presto_res, 
    var_contrast = 'cleaned_fine_types')
contrasts_mat

### Find marginal effects

In [None]:
effects_marginal = contrasts.presto(
    presto_res, 
    contrasts_mat, 
    one_tailed = TRUE
) %>% 
    dplyr::mutate(cluster = contrast) %>% 
    dplyr::mutate(
        logFC = sign(beta) * log2(exp(abs(beta))), ## convert stats to log2 for interpretability 
        SD = log2(exp(sigma)),
        zscore = logFC / SD
    ) %>%
    arrange(pvalue)

effects_marginal$fdr = p.adjust(effects_marginal$pvalue, method = 'BH')
effects_marginal$corr_fdr = effects_marginal$fdr
effects_marginal$corr_fdr[effects_marginal$fdr == 0] = min(effects_marginal$fdr[effects_marginal$fdr != 0])
effects_marginal$`-log10_fdr` = (-1) * log10(effects_marginal$corr_fdr) 
dim(effects_marginal)
head(effects_marginal)


In [None]:
meanExp = rowMeans(GetAssayData(scRNA, 'data')) 
meanExp = data.frame(feature = names(meanExp), meanExp = meanExp)
for (cluster in unique(effects_marginal$cluster)) {
    temp = GetAssayData(scRNA, 'counts')[,rownames(scRNA@meta.data)[scRNA@meta.data$cleaned_fine_types == cluster]] %>% as.data.frame()
    temp = temp %>%
    rowwise() %>%
    mutate(`N_zeros` = sum(c_across(everything()) == 0)) %>%
    select(`N_zeros`) %>% as.data.frame()
    rownames(temp) = rownames(GetAssayData(scRNA, 'counts'))
    meanExp[,cluster] = temp$`N_zeros`/length(rownames(scRNA@meta.data)[scRNA@meta.data$cleaned_fine_types == cluster])
}
head(meanExp)


In [None]:
temp = meanExp %>% pivot_longer(cols = unique(effects_marginal$contrast)) 
colnames(temp) = c('feature', 'meanExp', 'contrast', 'prop.expressed')
temp$prop.expressed = 1 - temp$prop.expressed 
effects_marginal = left_join(effects_marginal, temp, by = join_by(feature, contrast)) 
rm(temp)
head(effects_marginal)

In [None]:
effects_marginal_scRNA = effects_marginal

# compare logFC between merfish and scrna

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
combnVars = expand.grid(unique(effects_marginal_scRNA$contrast), unique(effects_marginal_merfish$contrast))
head(combnVars)
plotlist = list()
j = 1
for (i in 1:nrow(combnVars)){
    var1 = as.character(combnVars$Var1[i])
    var2 = as.character(combnVars$Var2[i])
    temp = dplyr::left_join(effects_marginal_scRNA %>% 
                                filter(contrast == var1) %>% 
                                mutate(fdr_scRNA = `-log10_fdr`, 
                                       contrast_scRNA = contrast,
                                       logFC_scRNA = logFC) %>% 
                                select(feature, 
                                       contrast_scRNA, 
                                       fdr_scRNA,
                                       logFC_scRNA), 
                            effects_marginal_merfish %>% 
                                filter(contrast == var2) %>% 
                                mutate(fdr_merfish = `-log10_fdr`, 
                                       contrast_merfish = contrast,
                                       logFC_merfish = logFC) %>% 
                                select(feature, contrast_merfish, fdr_merfish, logFC_merfish))
    temp = temp %>% mutate(labelTRUE = (fdr_merfish > ((-1)*log10(0.05))) | (fdr_scRNA > ((-1)*log10(0.05)))) %>% mutate(label = if_else(labelTRUE, feature, NA)) 
    temp$color = unlist(lapply(1:nrow(temp), function(i){
        if (temp$fdr_merfish[i] > ((-1)*log10(0.05)) & temp$fdr_scRNA[i] > ((-1)*log10(0.05))){
            return('fdr < 0.05 in both')
        } else {
            if (temp$fdr_merfish[i] > ((-1)*log10(0.05))) {
                return('fdr < 0.05 in merfish')
            } else {
                if (temp$fdr_scRNA[i] > ((-1)*log10(0.05))){
                    return('fdr < 0.05 in scRNA')
                } else {
                    return('fdr > 0.05 in both')
                }
            } 
        }
    }))
    temp$color = as.factor(temp$color)
    if (var1 == var2) {
        p1 = ggplot(temp, aes(x = logFC_merfish, y = logFC_scRNA, color=color), shape = 16) +
        geom_point() +
        #ylab(glue::glue('-log10 fdr in scRNA: ', var1)) +
        ylab(glue::glue('logFC in scRNA: ', var1)) +
        #xlab(glue::glue('-log10 fdr in MERFISH: ', var2)) +
        xlab(glue::glue('logFC in MERFISH: ', var2)) +
        ggtitle(glue::glue('MERFISH: ', var2, '\nscRNA: ', var1, '\nR2: ', round(cor(temp$fdr_scRNA, temp$fdr_merfish), 2))) +
        geom_hline(aes(yintercept = 0)) +
        geom_vline(aes(xintercept = 0)) +
        ggrepel::geom_label_repel(aes(label=label, color=color)) + 
        theme_minimal(base_size=18) +
        geom_abline(intercept = 0, slope = 1) +
        scale_color_brewer(palette="Set1") +
        theme(legend.position='right') +
        #geom_vline(xintercept = ((-1)*log10(0.05))) +
        #geom_hline(yintercept = ((-1)*log10(0.05))) +
        guides(color = guide_legend(override.aes = list(size=10, shape=16)))
        print(p1)
        plotlist[[j]] = p1
        j = 1 + j
    }
}
length(plotlist)

In [None]:
effects_marginal_merfish %>% filter(fdr < 0.05) %>% group_by(contrast) %>% mutate(rank = rank(fdr, ties.method = "random")) %>% ungroup() %>% select(feature, contrast, rank) %>% spread(contrast, feature, fill = NA) %>% filter(rank < 21)

# plot in space

In [None]:
merfish
options(repr.plot.width = 15, repr.plot.height = 10, repr.plot.res = 300)
ggplot(merfish@meta.data) +
geom_point(shape = '.', aes(x = x, y = y, color = cleaned_fine_types)) +
ggthemes::scale_color_tableau('Tableau 20') +
theme_minimal() +
guides(colour = guide_legend(override.aes = list(size=10, shape=16))) +
coord_sf()

In [None]:
constantPalette = rep('red', length = length(unique(merfish@meta.data$cleaned_fine_types)))
options(repr.plot.width = 40, repr.plot.height = 40, repr.plot.res = 300)
ggplot(merfish@meta.data) +
geom_point(shape = '.', aes(x = x, y = y, color = cleaned_fine_types)) + 
#ggthemes::scale_color_tableau('Tableau 20') +
scale_color_manual(values = constantPalette) +
theme_minimal(base_size = 18) + 
facet_wrap(~cleaned_fine_types) +
gghighlight() +
coord_sf()

In [None]:
merged_Strom@meta.data %>% group_by(cleaned_fine_types, technology) %>% summarize(n=n()) %>% pivot_wider(values_from = n, names_from = cleaned_fine_types) %>% t()

# fovs with lots of CAFs

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5, repr.plot.res = 300)
ggplot(merfish@meta.data %>% filter(y > 4000 & y < 6000 & x > 8000 & x < 10000) %>% filter(grepl(pattern = 'CAF|Fibro', x = .$cleaned_fine_types)) %>% sample_n(size=nrow(.))) +
geom_point(data = merfish@meta.data %>% filter(y > 4000 & y < 6000 & x > 8000 & x < 10000), color = 'lightgrey', aes(x, y), shape = 16) +
geom_point( aes(x = x, y = y, color = cleaned_fine_types, shape = cleaned_fine_types)) + 
ggthemes::scale_color_tableau('Tableau 20') +
theme_minimal() + 
coord_sf()

In [None]:
merfish@meta.data$cell %>% head()

In [None]:
temp = merfish@meta.data %>% filter(y > 4000 & y < 6000 & x > 8000 & x < 10000) %>% filter(grepl(pattern = 'CAF|Fibro', x = .$cleaned_fine_types)) %>% sample_n(size=nrow(.)) %>% select(x, y, cell, cleaned_fine_types) %>%
right_join(., GetAssayData(merfish, slot = 'counts')[c('GREM1', 'MMP3', 'CXCL14', 'BMP4', 'BMP5'),] %>% t() %>% as.data.frame() %>% mutate(cell = rownames(.))) 
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = GREM1)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = MMP3)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = CXCL14)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = BMP4)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = BMP5)) +
scale_color_viridis_c(direction = -1))

In [None]:
temp = merfish@meta.data %>% sample_n(size=nrow(.)) %>% select(x, y, cell, cleaned_fine_types) %>%
right_join(., GetAssayData(merfish, slot = 'counts')[c('CCL19', 'GREM1', 'MMP3', 'CXCL14', 'BMP4', 'BMP5'),] %>% t() %>% as.data.frame() %>% mutate(cell = rownames(.))) 
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = GREM1)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = MMP3)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = CXCL14)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = BMP4)) +
scale_color_viridis_c(direction = -1)) +
(ggplot(temp) +
geom_point(shape = '.', aes(x, y, color = BMP5)) +
scale_color_viridis_c(direction = -1))

# Cell type frequencies - compare MERFISH and scRNA

In [None]:
table(merged_Strom@meta.data$cleaned_fine_types, merged_Strom@meta.data$technology) %>% as.data.frame() %>% filter(Freq > 0) %>% rename('cleaned_fine_type' = Var1, 'technology' = Var2) %>% pivot_wider(values_from = Freq, names_from = technology) %>% mutate(MERFISH = round(100 * .$MERFISH/sum(.$MERFISH), 2), scRNA = round(100 * .$scRNA/sum(.$scRNA), 2)) 