## Find DEGs for KNN_Grouped

In [None]:
suppressPackageStartupMessages({
    require(tidyverse)
    require(sf)
    require(data.table)
    require(Matrix)
    require(ggpubr)
    require(ggthemes)
    require(future)
    require(furrr)
    require(lme4)
    require(presto)
    require(ggrepel)
    require(Seurat)
    require(ComplexHeatmap)
    require(circlize)
    sf::sf_use_s2(FALSE)  
    set.seed(1)
})

### load merfish counts and metadata

### counts

In [None]:
system.time({counts = readr::read_rds('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH_niches/labeled_seurat_objects/renamed_cell_states/counts_complete.rds')}) # 50 s
system.time({counts = Matrix(as.matrix(counts), sparse = TRUE)})
dim(counts)

In [None]:
gene_panel = read.table('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH/figure6/merfish_gene_panel.tsv')$x
gene_panel %>% length

In [None]:
rownames(counts) = gene_panel

### metadata

In [None]:
metadata = readr::read_rds('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH_niches/labeled_seurat_objects/renamed_cell_states/metadata_complete.rds')
metadata$KNN_group = as.vector(metadata$KNN_group)
metadata$knn_renamed_cell_states = as.vector(metadata$knn_renamed_cell_states)
metadata = metadata %>% 
    mutate(KNN_group = ifelse(knn_coarse == 'B', knn_renamed_cell_states, KNN_group)) %>% 
    mutate(knn_renamed_cell_states = ifelse(knn_renamed_cell_states %in% c('Fibro-CXCL14', 'Fibro-BMP'), 'Fibro-CXCL14-BMP', knn_renamed_cell_states)) 
sample_n(metadata, 10)

In [None]:
metadata$knn_renamed_cell_states %>% unique %>% writeLines

## load pathology regions

In [None]:
pathology_regions = data.table::fread('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH/compare_CXCL10_niches_and_tessera_20240930/adata_all_obs.csv') %>%
    select(!V1) %>%
    mutate(orig_cell_id = sample_cell) 

# remove 'submucosa' regions

pathology_regions$pathology_region[pathology_regions$pathology_region == 'non_neoplastic_submucosa'] = 'Unannotated'
pathology_regions$sample_name[pathology_regions$sample_name == 'G4659'] = 'G4659-CP-MET_VMSC04701'
pathology_regions$orig_cell_id[pathology_regions$sample_name == 'G4659-CP-MET_VMSC04701'] = paste0('G4659-CP-MET_VMSC04701_', pathology_regions$cell_id[pathology_regions$sample_name == 'G4659-CP-MET_VMSC04701'])

pathology_regions$sample_name[pathology_regions$sample_name == 'G4659_Beta8'] = 'G4659-CP-MET_Beta8'
pathology_regions$orig_cell_id[pathology_regions$sample_name == 'G4659-CP-MET_Beta8'] = paste0('G4659-CP-MET_Beta8_', pathology_regions$cell_id[pathology_regions$sample_name == 'G4659-CP-MET_Beta8'])

pathology_regions$sample_cell = paste0(pathology_regions$sample_name, '_', pathology_regions$cell_id)
pathology_regions$orig_cell_id = paste0(pathology_regions$sample_name, '_', pathology_regions$cell_id)

In [None]:
pathology_regions$MSstatus[pathology_regions$sample_name == 'C107'] %>% unique

In [None]:
pathology_regions = pathology_regions %>%
    mutate(condition = MSstatus,
          knn_renamed_cell_states = KNN_celltype_v2,
          orig.ident = sample_name,
          knn_coarse = KNN_Top,
          #pathology_region = ifelse(grepl(pathology_region, pattern = 'tumor'), yes = 'tumor', no = pathology_region),
          ) %>%
    mutate(knn_coarse = ifelse(knn_coarse == 'Mast', yes = 'Myeloid', no = ifelse(knn_coarse %in% c('B', 'Plasma'), yes = 'Bplasma', no = knn_coarse))) %>%
    mutate(knn_renamed_cell_states = ifelse(knn_coarse == 'Epi', yes = 'Epi', no = knn_renamed_cell_states)) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = '_', replacement = ' ')) %>%
    mutate(pathology_region = str_to_title(pathology_region)) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = 'Muscularis Propria', replacement = 'M. propria')) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = 'Non Neoplastic Mucosa', replacement = 'Non-neo. muc.')) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = 'Non Neoplastic Submucosa|non_neoplastic_submucosa', replacement = 'Unannotated')) %>%  #'Non-neo. submuc.'
    mutate(pathology_region = gsub(pathology_region, pattern = 'Tumor Invasive Margin', replacement = 'Tumor inv-border')) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = 'Tumor Luminal Margin', replacement = 'Tumor lum-border'))

In [None]:
sample_n(pathology_regions, 10)

### add pathology annotations to metadata

In [None]:
metadata = left_join(metadata, pathology_regions %>% select(sample_cell, pathology_region), join_by(cell == sample_cell))

In [None]:
dim(metadata)

In [None]:
getwd()

# add updated knn-mid annotations to metadata

In [None]:
knn_mid_renamed = fread('knn_cell_state_labels.csv')
knn_mid_renamed
metadata = left_join(metadata, knn_mid_renamed)

## glmm to find degs between knn_mid_renamed groups across all samples

In [None]:
# metadata = left_join(metadata, pathology_regions %>% select(sample_cell, knn_mid), join_by(cell == sample_cell))

In [None]:
metadata = metadata %>%
    mutate(orig.ident = gsub(orig.ident, pattern = '_.*', replacement = "")) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = 'Tumor.*', replacement = 'Tumor'))

In [None]:
rownames(metadata) = metadata$cell

In [None]:
table(metadata$knn_mid_renamed)

In [None]:
table(metadata$pathology_region)

In [None]:
counts = counts[, rownames(metadata)]
dim(counts)

In [None]:
metadata$logUMI = log(colSums(counts) + 1)

In [None]:
dim(metadata)
dim(counts)

In [None]:
require(furrr)
require(future)
require(presto)
plan(multisession)
require(singlecellmethods)
pb = presto::collapse_counts(
    counts_mat = counts, 
    meta_data = metadata,
    c('orig.ident', 'knn_mid_renamed'), 
    min_cells_per_group = 3
)
pb$meta_data

In [None]:
system.time({
presto_res = presto::presto.presto(
    #formula = y ~ 1 + (1|knn_mid_renamed) + (1|orig.ident/knn_mid_renamed) + offset(logUMI),
    formula = y ~ 1 + (1|knn_mid_renamed) + (1|orig.ident) + offset(logUMI),
    design = pb$meta_data, #metadata, 
    response = pb$counts_mat, #counts,
    size_varname = "logUMI", 
    effects_cov = 'knn_mid_renamed',
    ncore = 10, 
    min_sigma = .05,
    family = "poisson",
    nsim = 1000 
)}) 

In [None]:
contrasts_mat = make_contrast.presto(
    presto_res, 
    var_contrast = 'knn_mid_renamed')

effects_marginal = contrasts.presto(
    presto_res, 
    contrasts_mat, 
    one_tailed = FALSE
) %>% 
    dplyr::mutate(
        logFC = sign(beta) * log2(exp(abs(beta))), # convert stats to log2 for interpretability 
        SD = log2(exp(sigma)),
        zscore = logFC / SD,
        fdr = p.adjust(pvalue, method = 'fdr')
    ) %>%
    arrange(pvalue) 
correction = effects_marginal$fdr[effects_marginal$fdr != 0] %>% min

In [None]:
sample_n(effects_marginal, 20)

In [None]:
effects_marginal$contrast %>% unique

# plot heatmap

In [None]:
canonicalMarkers = c('CD2', 'CD3D', 'TRAC', 'CD4', 'CD40LG', 'CD8A', 'CD8B', 'TRGC1', 'TRDC', 'ZBTB16', 'KLRF1', 'CMC1', 'LST1', 'RORC', 'CD19', 'MS4A1', 'CD79A', 'CD27', 'MZB1', 'KIT', 'CTSG', 'CPA3', 'CD1E', 'CLEC4C', 'LAMP3', 'CD163', 'C1QA', 'FCN1', 'VCAN', 'S100A12', 'FCGR3B', 'HCAR2', 'SEMA3G', 'VWF', 'ACKR1', 'COL4A1', 'RGS5', 'NOTCH3', 'COL1A2', 'COL10A1', 'MYH11', 'S100B', 'NRXN1', 'CEACAM1', 'KRT20', 'KRT8')
canonicalMarkers

In [None]:
.mid = knn_mid_renamed$knn_mid_renamed %>% unique
.mid

In [None]:
.temp = effects_marginal %>%
    filter(feature %in% canonicalMarkers) %>%
    select(contrast, feature, logFC) %>%
    pivot_wider(names_from = feature, values_from = logFC) %>%
    column_to_rownames('contrast') %>%
    as.matrix
dim(.temp)

In [None]:
.rowAnno = metadata %>%
    select(knn_mid_renamed, knn_coarse) %>%
    group_by(knn_mid_renamed, .drop = FALSE) %>%
    mutate(n = n()) %>%
    ungroup %>%
    distinct %>%
    na.omit %>%
    as.data.frame
rownames(.rowAnno) = .rowAnno$knn_mid_renamed
.rowAnno = .rowAnno[.mid,]
.rowAnno

In [None]:
.pval = effects_marginal %>%
    filter(feature %in% canonicalMarkers) %>%
    select(contrast, feature, fdr, logFC) %>%
    mutate(fdr = ifelse(fdr < 0.05 & logFC > 2, yes = '*', no = '')) %>%
    select(!logFC) %>%
    pivot_wider(names_from = feature, values_from = fdr) %>%
    column_to_rownames('contrast') %>%
    as.matrix
dim(.pval)

In [None]:
canonicalMarkers %in% effects_marginal$feature

In [None]:
canonicalMarkers[!canonicalMarkers %in% colnames(.temp)]

In [None]:
.knn_coarse = .rowAnno$knn_coarse
names(.knn_coarse) = .rowAnno$knn_mid_renamed

.temp = .temp[.rowAnno$knn_mid_renamed, canonicalMarkers]
.pval = .pval[.rowAnno$knn_mid_renamed, canonicalMarkers]

In [None]:
options(repr.plot.res = 500, repr.plot.width = 12, repr.plot.height = 5)
set.seed(1)
ha1 = HeatmapAnnotation(
    which = 'row', 
    Lineage = .knn_coarse,
    col = list(Lineage = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )),
    annotation_legend_param = list(Lineage = list(nrow = 3, direction = 'horizontal'))
    )
ha2 = HeatmapAnnotation(
    `log10(Count)` = anno_barplot(log10(.rowAnno$n)),
    annotation_name_rot = 0,
    which = 'row'
    )
#col_fun = colorRamp2(c(min(.temp), 0, max(.temp)), c(scales::muted('blue'), "white", scales::muted('red')))
col_fun = colorRamp2(c(min(.temp), 0, max(.temp)), c('white', 'white', scales::muted('navyblue')))
h1 = ComplexHeatmap::Heatmap(
                        heatmap_legend_param = list(direction = 'horizontal'),
                        col = col_fun,
                        cluster_rows = TRUE,
                        cluster_columns = FALSE,
                        left_annotation = ha1,
                        right_annotation = ha2,
                        cell_fun = function(j, i, x, y, width, height, fill) {grid.text(.pval[i, j], x, y, gp = gpar(fontcolor = 'red', fontsize = 10))},
                        name = 'logFC',
                        column_names_side = 'top',
                        show_column_dend = FALSE,
                        show_row_dend = FALSE,
                        matrix = .temp,
                        row_names_side = 'left')
draw( h1,
     merge_legend = TRUE, 
     heatmap_legend_side = "bottom", 
     annotation_legend_side = "bottom")

In [None]:
require(grid)
h1

In [None]:
pdf('figure_1c.pdf', width = 12, height = 5)
draw( h1,
     merge_legend = TRUE, 
     heatmap_legend_side = "bottom", 
     annotation_legend_side = "bottom")
dev.off()

In [None]:
png('figure_1c.png', width = 12, height = 5, units = 'in', res = 500)
draw( h1,
     merge_legend = TRUE, 
     heatmap_legend_side = "bottom", 
     annotation_legend_side = "bottom")
dev.off()

## representative sample in space

In [None]:
require(ggspatial)

In [None]:
options(repr.plot.res = 500, repr.plot.width = 8, repr.plot.height = 8)

require(scattermore)
rotation = 140
g4669 = metadata %>% filter(orig.ident %>% grepl(pattern = "G4423")) %>%
    left_join(., pathology_regions %>% select(sample_cell, pathology_region) %>% rename(cell = sample_cell)) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = '_', replacement = ' '))
table(g4669$pathology_region) %>% as.data.frame %>% filter(Freq > 0)

G4423 = ggplot(g4669) +
    geom_scattermore(aes(x = x, y = y, color = knn_coarse)) +
    coord_sf() +
    theme_void() +
    #theme_minimal() +
    #ggpubr::theme_pubr() +
    scale_color_manual(name = 'Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) +
    guides(color = guide_legend(title.position="top", title.hjust = 0.5, ncol = 1, override.aes = list(shape = 16, size = 10))) +
    theme(legend.position = c(0.1, 0.75)) + 
    annotate(geom = "text", x = 12000, y = 3000, label = "Luminal surface", angle = 45, size = 8) +
    annotate(geom = "text", x = 5000, y = 6500, label = "Mucosa", angle = 45, size = 8) +
    annotate(geom = "text", x = 7500, y = 2500, label = "Tumor", angle = 45, size = 8) +
    annotation_scale(location = 'tr') +
    annotate('rect', xmin = 8250, xmax = 10000, ymin = 8500, ymax = 9800, color = "red", fill = NA)  + 
    annotate('rect', xmin = 9500, xmax = 11250, ymin = 5000, ymax = 6200, color = "black", fill = NA)  + 
    NULL
G4423

### rotated

In [None]:
require(scattermore)
rotation = 140
g4669 = metadata %>% filter(orig.ident %>% grepl(pattern = "G4423")) %>%
    left_join(., pathology_regions %>% select(sample_cell, pathology_region) %>% rename(cell = sample_cell)) %>%
    mutate(pathology_region = gsub(pathology_region, pattern = '_', replacement = ' '))
table(g4669$pathology_region) %>% as.data.frame %>% filter(Freq > 0)
G4423_rotate=ggplot(g4669) +
    geom_scattermore(aes(x = x, y = y, color = knn_coarse)) +
    coord_sf() +
    theme_void() +
    scale_color_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) +
    #ggtitle('Cell Lineage') +
    #theme(legend.position = 'none') +
    guides(color = guide_legend(title.position="top", title.hjust = 0.5, ncol = 3, override.aes = list(shape = 16, size = 10))) +
    theme(legend.position = 'none') + 
    annotate(geom = "text", x = 12000, y = 3000, label = "Luminal surface", angle = -1*rotation, size = 6) +
    annotate(geom = "text", x = 5000, y = 6500, label = "Non-neoplastic mucosa", angle = -1*rotation, size = 6) +
    annotate(geom = "text", x = 7500, y = 2500, label = "Tumor", angle = -1*rotation, size = 6) +
    theme(text = element_text(angle=(-1*rotation))) +
    annotation_scale(location = 'tr') +
    annotate('rect', xmin = 8250, xmax = 10000, ymin = 8500, ymax = 9800, color = "red", fill = NA)  + 
    annotate('rect', xmin = 9500, xmax = 11250, ymin = 5000, ymax = 6200, color = "black", fill = NA)  + 
    NULL
require(grid)
options(repr.plot.res = 500, repr.plot.width = 8, repr.plot.height = 8)
print(G4423_rotate, vp=viewport(angle=140))
print(G4423_rotate, vp=viewport(angle=140))

# TLS and stromal network

In [None]:
sf::sf_use_s2(FALSE)  
G4423_cells = sf::st_read('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH/Pelka_Baysor_segmentation/data_and_ingest/G4423/baysor_res_merged/segmentation_polygons.geojson', promote_to_multi = FALSE)

## vasculature

In [None]:
tls_cells = g4669 %>%
    select(x, y, knn_coarse, knn_renamed_cell_states, cell_id) %>%
    mutate(knn_coarse = as.factor(knn_coarse)) %>% 
    filter(x < 10000 & x > 8250 & y > 8500 & y < 9800)
head(tls_cells)

## TLS

In [None]:
tls_cells = g4669 %>%
    select(x, y, knn_coarse, knn_renamed_cell_states, cell_id) %>%
    mutate(knn_coarse = as.factor(knn_coarse)) %>% 
    filter(x < 10000 & x > 8250 & y > 8500 & y < 9800)
head(tls_cells)

In [None]:
# tls_types_to_show = knn_mid_renamed %>%
#     filter(knn_mid_renamed %in% c('B', 'CD4+ T', 'CD8+ T')) %>%
#     pull(knn_renamed_cell_states)
# length(tls_types_to_show)
color_scale = c(
    "Other" = 'lightgrey',
    "Bgc-GPR183" = '#fd6f30',
    "Tcd4-IL7R" = '#eb1e2c',
    "B" = '#5fbb68',
    "Bgc" = '#bbc9e5',
    "Tcd4-TFH" = '#64cdcc',
    "Bgc-CD40-prolif" = '#f9d23c',
    "Bgc-CD40" = '#f9a729'
) 
color_scale
tls_types_to_show = names(color_scale)
length(tls_types_to_show)

In [None]:
tls_cells = g4669  %>% 
    filter(x < 10000 & x > 8250 & y > 8500 & y < 9800) %>%
    mutate(knn_renamed_cell_states = if_else(condition = knn_renamed_cell_states %in% tls_types_to_show, true = knn_renamed_cell_states, false = "Other")) %>%
    select(x, y, knn_coarse, knn_renamed_cell_states, cell_id) %>%
    mutate(ClusterTop = as.factor(knn_coarse))
head(tls_cells)

In [None]:
.temp = G4423_cells %>%
    #st_make_valid() %>%
    filter(cell_id %in% tls_cells$cell_id) %>%
    filter(grepl("POLYGON", st_geometry_type(geometry))) %>%
    left_join(., tls_cells) 
.temp %>%
    slice_sample(n = 20) %>%
    as.data.frame
dim(.temp)

In [None]:
options(repr.plot.height = 10, repr.plot.width = 10, repr.plot.res = 300)
cell_lineages_tls = G4423_cells %>%
    st_make_valid() %>%
    filter(cell_id %in% tls_cells$cell_id) %>%
    left_join(., tls_cells) %>%
    ggplot() +
    #geom_scattermore(aes(x, y, color = knn_coarse)) +
    geom_sf(aes(color = knn_coarse, fill = knn_coarse)) +
    theme_void(base_size = 28) +
    ggtitle('Cell Lineages in the TLS') +
    theme(legend.position = 'bottom', 
          title = element_text(size = 20), 
          panel.border = element_rect(colour = "red", fill=NA, linewidth=1)) +
    guides(fill = guide_legend(ncol = 2, override.aes = list(size=10, shape = 16))) +
    scale_color_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) +
    scale_fill_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) 
cell_lineages_tls

In [None]:
.temp$knn_renamed_cell_states %>% unique %>% sort

In [None]:
options(repr.plot.height = 10, repr.plot.width = 10, repr.plot.res = 300)

cell_states_tls = G4423_cells %>%
filter(cell_id %in% tls_cells$cell_id) %>%
left_join(., tls_cells) %>%
mutate(knn_renamed_cell_states = str_wrap(knn_renamed_cell_states, width = 10)) %>%
ggplot() +
geom_sf(aes(color = knn_renamed_cell_states, fill = knn_renamed_cell_states)) +
theme_void(base_size = 20) +
scale_color_manual(values = color_scale, name = "Cell States", breaks = c(tls_types_to_show[tls_types_to_show!='Other'], 'Other')) +
scale_fill_manual(values = color_scale, name = "Cell States", breaks = c(tls_types_to_show[tls_types_to_show!='Other'], 'Other')) +
ggtitle('T and B Cell States in the TLS') +
theme(legend.position = 'bottom', title = element_text(size = 28)) +
guides(fill = guide_legend(ncol = 2, override.aes = list(size=10, shape = 16)))
cell_states_tls

In [None]:
options(repr.plot.height = 10, repr.plot.width = 20, repr.plot.res = 300)
cell_lineages_tls + cell_states_tls

## stromal network

In [None]:
strom_cells = g4669 %>%
    select(x, y, knn_coarse, knn_renamed_cell_states, cell_id) %>%
    mutate(knn_coarse = as.factor(knn_coarse)) %>% 
    #filter(x > 9000 & x < 10750 & y > 5000 & y < 6300) %>% #xmin = 9000, xmax = 10750, ymin = 5000, ymax = 6300
    filter(x > 9500 & x < 11250 & y < 6200 & y > 5000) %>% #xmin = 9500, xmax = 10250, ymin = 5000, ymax = 6200
    select(x, y, knn_coarse, knn_renamed_cell_states, cell_id) %>%
    mutate(knn_coarse = as.factor(knn_coarse))
head(strom_cells)
dim(strom_cells)

In [None]:
.temp = G4423_cells %>%
    filter(cell_id %in% strom_cells$cell_id) %>%
    filter(grepl("POLYGON", st_geometry_type(geometry))) %>%
    left_join(., strom_cells) 
.temp %>%
    slice_sample(n = 20) %>%
    as.data.frame
dim(.temp)

In [None]:
options(repr.plot.height = 10, repr.plot.width = 10, repr.plot.res = 300)
cell_lineages_strom = .temp %>%
    left_join(., strom_cells) %>%
    ggplot() +
    #geom_scattermore(aes(x, y, color = knn_coarse)) +
    geom_sf(aes(color = knn_coarse, fill = knn_coarse)) +
    theme_void(base_size = 28) +
    ggtitle('Cell Lineages in the invasive border') +
    theme(legend.position = 'bottom', 
          title = element_text(size = 20), 
          panel.border = element_rect(colour = "black", fill=NA, linewidth=1)) +
    guides(fill = guide_legend(ncol = 2, override.aes = list(size=10, shape = 16))) +
    scale_color_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) +
    scale_fill_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
        )) +
    coord_sf()
cell_lineages_strom

In [None]:
.temp$knn_renamed_cell_states %>% table %>% as.data.frame %>% filter(Freq > 0) %>% arrange(desc(Freq)) 

In [None]:
colnames(.temp)

In [None]:
knn_mid_renamed = fread('knn_cell_state_labels.csv')
knn_mid_renamed
.temp = left_join(.temp, knn_mid_renamed)

In [None]:
table(.temp$knn_mid_renamed)

In [None]:
top_cell_mid = .temp %>%
    as.data.frame %>%
    group_by(knn_mid_renamed) %>%
    summarize(n = n()) %>%
    filter(n > 65) %>%
    pull(knn_mid_renamed)
top_cell_mid

In [None]:
options(repr.plot.height = 10, repr.plot.width = 10, repr.plot.res = 300)
knn_mid_strom = .temp %>%
    left_join(., strom_cells) %>%
    mutate(knn_mid_renamed = ifelse(knn_mid_renamed %in% top_cell_mid, top_cell_mid, NA)) %>%
    ggplot() +
        #geom_scattermore(aes(x, y, color = knn_coarse)) +
        geom_sf(aes(color = knn_mid_renamed, fill = knn_mid_renamed)) +
        theme_void(base_size = 28) +
        ggtitle('Cell Lineages in the invasive border') +
        theme(legend.position = 'bottom', title = element_text(size = 20)) +
        guides(fill = guide_legend(ncol = 2, override.aes = list(size=10, shape = 16))) +
        # scale_color_manual(name = 'Cell Lineage ', values = c('Epi' = '#CA49FC',
        #     'Strom' = '#00D2D0',
        #     'Myeloid' = '#FFB946',
        #     'Mast' = '#F4ED57',
        #     'Plasma' = '#61BDFC',
        #     'B' = '#0022FA',
        #     'TNKILC' = '#FF3420'
        #     )) +
        # scale_fill_manual(name = 'Cell State ', values = c('Epi' = '#CA49FC',
        #     'Strom' = '#00D2D0',
        #     'Myeloid' = '#FFB946',
        #     'Mast' = '#F4ED57',
        #     'Plasma' = '#61BDFC',
        #     'B' = '#0022FA',
        #     'TNKILC' = '#FF3420'
        #     )) +
        scale_color_tableau('Tableau 10') +
        scale_fill_tableau('Tableau 10') +
        coord_sf()
knn_mid_strom

# Lay out panels

In [None]:
require(patchwork)

In [None]:
options(repr.plot.height = 10, repr.plot.width = 16, repr.plot.res = 300)
fig1e = (G4423 | ((cell_lineages_tls + theme(legend.position = 'none') + ggtitle('')) / (cell_lineages_strom + theme(legend.position = 'none') + ggtitle('')))) + plot_layout(widths = c(2, 1))
fig1e

In [None]:
ggsave(fig1e, filename = 'fig1e.pdf', width = 16, height = 10, units = 'in')

In [None]:
options(repr.plot.height = 10, repr.plot.width = 16, repr.plot.res = 300)
G4423_panel = (G4423_rotate | ((cell_lineages_tls + theme(legend.position = 'none') + ggtitle('')) / (cell_lineages_strom + theme(legend.position = 'none') + ggtitle('')))) + plot_layout(widths = c(2, 1))

In [None]:
print(((cell_lineages_tls + theme(legend.position = 'none') + ggtitle('')) / (cell_lineages_strom + theme(legend.position = 'none') + ggtitle(''))) + theme(legend.position = 'none', text = element_text(size = 20)), vp=viewport(angle=180))
pdf('fig1d_rotate_panel_2.pdf', width = 3, height = 6)
print(((cell_lineages_tls + theme(legend.position = 'none') + ggtitle('')) / (cell_lineages_strom + theme(legend.position = 'none') + ggtitle(''))) + theme(legend.position = 'none', text = element_text(size = 20)), vp=viewport(angle=180))
dev.off()

In [None]:
options(repr.plot.height = 6, repr.plot.width = 10, repr.plot.res = 300)
print(G4423_rotate + theme(legend.position = 'none', text = element_text(size = 20)), vp=viewport(angle=140))
pdf('fig1d_rotate_panel_1.pdf', width = 10, height = 6)
print(G4423_rotate + theme(legend.position = 'none', text = element_text(size = 20)), vp=viewport(angle=140))
dev.off()

# Compare numbers in MERFISH vs scRNA datasets

## load raw scrna data

In [None]:
scrna = readr::read_rds('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH/Pelka_reference_cleaning/complete_pelka_dataset.rds')
scrna

In [None]:
colnames(scrna@meta.data) %>% writeLines

In [None]:
length(unique(scrna@meta.data$sampleID))
length(unique(scrna@meta.data$batchID))
head(unique(scrna@meta.data$batchID))
length(unique(scrna@meta.data$orig.ident))
length(unique(scrna@meta.data$clTopLevel))

## load merfish data

In [None]:
merfish = readr::read_rds('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH_niches/labeled_seurat_objects/renamed_cell_states/metadata_complete.rds')
sample_n(merfish, 20)

In [None]:
colnames(merfish) %>% writeLines

## stacked barplot

In [None]:
lineage_counts = rbind(scrna@meta.data %>% 
    group_by(batchID, clTopLevel) %>%
    summarize(n = n()) %>%
    mutate(technology = 'scRNA') %>%
    rename(orig.ident = batchID, coarse = clTopLevel) %>%
    ungroup,
    merfish %>% 
    group_by(knn_coarse, orig.ident) %>%
    summarize(n = n()) %>%
    mutate(technology = 'MERFISH') %>%
    rename(coarse = knn_coarse) %>%
    ungroup
    )

sample_n(lineage_counts, 20)

In [None]:
unique(lineage_counts$coarse[lineage_counts$technology == 'MERFISH'])

In [None]:
lineage_percent = lineage_counts %>%
    group_by(orig.ident, .drop = FALSE) %>%
    mutate(total_in_sample = sum(n)) %>%
    mutate(percent_of_sample = 100*n/total_in_sample) %>%
    ungroup %>%
    group_by(technology, coarse) %>%
    summarize(median_percent_of_sample = median(percent_of_sample),
             sd_percent_of_sample = sd(percent_of_sample))
lineage_percent

In [None]:
options(repr.plot.height = 6, repr.plot.width = 4, repr.plot.res = 300)
fig1c = ggplot(lineage_percent, aes(x = technology, 
              y = median_percent_of_sample, 
              fill = coarse)) +
geom_bar(position="fill", 
         stat="identity") +
    ggpubr::theme_pubr(base_size = 16) +
    theme(legend.position = 'right') +
    xlab('Dataset') +
    ylab('Median percent of cells') +
    theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5, hjust = 0.5)) + #, axis.ticks.x = element_blank()
    scale_fill_manual(name = 'Lineage', values = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
    )) +
    scale_y_continuous(name = "Median percent of cells", labels = scales::label_percent(accuracy = 1))
fig1c
ggsave(height = 6, width = 4, plot = fig1c, filename = 'fig1c.pdf')


## list cell states

In [None]:
lineage_colors = c('Epi' = '#CA49FC',
        'Strom' = '#00D2D0',
        'Myeloid' = '#FFB946',
        'Mast' = '#F4ED57',
        'Plasma' = '#61BDFC',
        'B' = '#0022FA',
        'TNKILC' = '#FF3420'
    )
lineage_colors[['Plasma']]

In [None]:
options(repr.plot.height = 5, repr.plot.width = 2, repr.plot.res = 300)
plotList = lapply(unique(merfish$knn_coarse), function(lineage){
    .data = table(merfish$knn_coarse, merfish$knn_renamed_cell_states) %>%
        as.data.frame %>%
        filter(Freq > 0) %>%
        select(!Freq) %>%
    filter(Var1 == lineage) %>%
    arrange(Var2) %>%
    mutate(y = rank(Var1, ties.method = 'first'), x = 0)  
    return(ggplot(.data) + 
        geom_text(aes(x = x, y = y, label = Var2), size = 3) +
        theme_void() +
        theme(panel.border = element_rect(colour = lineage_colors[[lineage]], fill=NA, linewidth=1)) +
        ylim(min(.data$y-0.5), max(.data$y+0.5)) +
        xlim(-0.05, 0.05) +
        ggtitle(lineage))
})
names(plotList) = unique(merfish$knn_coarse)
plotList[['Strom']]

In [None]:
options(repr.plot.height = 4, repr.plot.width = 10, repr.plot.res = 300)
require(patchwork)

In [None]:
names(plotList)

In [None]:
options(repr.plot.height = 4, repr.plot.width = 8.5, repr.plot.res = 300)

cellStates = wrap_elements(plotList[['TNKILC']] +
plotList[['B']] +
plotList[['Plasma']] +
plotList[['Mast']] +
plotList[['Myeloid']] +
plotList[['Strom']] +
plotList[['Epi']] +
plot_spacer() +
plot_layout(design = 'ABEFG\nACEFG\nADEFH', 
            widths = c(1,0.75, 1.5, 1, 1.15)) & #ABCDEFG
theme(plot.margin = unit(c(0,5,0,0), 'point')))
cellStates
ggsave(plot = cellStates, filename = 'fig1c_cellStates.pdf', height = 4, width = 8.5)

# complete figure

In [None]:
options(repr.plot.height = 16, repr.plot.width = 16, repr.plot.res = 300)
fig1a = ggplot() + theme_void() + geom_text(aes(x = 0, y = 0, label = 'Cohort heatmap')) + ylim(-0.5, 0.5) + xlim(-0.5, 0.5)
fig1b_1 = ggplot() + theme_void() + geom_text(aes(x = 0, y = 0, label = 'Cell typing flowchart')) + ylim(-0.5, 0.5) + xlim(-0.5, 0.5)
fig1b_2 = cellStates
fig1c = fig1c + theme(legend.position = 'none')
fig1e = fig1e
fig1d = wrap_elements(grid.grabExpr(draw( h1,
     merge_legend = TRUE, 
     heatmap_legend_side = "bottom", 
     annotation_legend_side = "bottom"), wrap = TRUE, wrap.grobs = TRUE)) #ggplot() + theme_void() + geom_text(aes(x = 0, y = 0, label = 'Heatmap')) + ylim(-0.5, 0.5) + xlim(-0.5, 0.5)
complete_figure_1 = fig1a + fig1b_1 + fig1b_2 + fig1c + fig1d + fig1e + plot_layout(nrow = 3, design = 'ABBBCC\nDEEEEE\nFFFFFF', heights = c(1, 1, 1)) + plot_annotation(tag_levels = c('A', '1')) & theme(plot.tag = element_text(face = 'bold', size = 12))
ggsave(plot = complete_figure_1, filename = 'complete_figure_1.pdf', width = 16, height = 16)