# figure s1 vlz

- kernel: r_env, R 4.1.3

## load

In [8]:
library(tidyverse)
library(tools)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)
library(igraph)
library(ggraph)
library(Seurat)
library(ComplexHeatmap)

# source('../../stage2/scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

outdir <- '../figures/figs1'
create_dir(outdir)

ERROR: Error in library(ComplexHeatmap): there is no package called ‘ComplexHeatmap’


In [2]:
f_pat_gp <- '../tables/patient_info_v2.tsv'
gp <- 'response'

gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_lvls$response <- c('R', 'PR')
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

ERROR: Error in gp_comp_map$response <- list(c("R-pre", "R-post"), c("PR-pre", : object 'gp_comp_map' not found


## clinical info: s1a

In [16]:
f_sc_clin <- '../../assets/clinical/sc_merge_clinical-230925.xlsx'

In [17]:
df <- readxl::read_xlsx(f_sc_clin, sheet = 'clinical_info') %>% 
    select(patient, treatment_sum, mandard_score) %>% 
    add_clin_info(ftsv = f_pat_gp, columns = c('response', 'response_v2'), merge_by = 'patient')
# %>%
#     mutate(treatment_sum = if_else(treatment_sum == 'NA', 'unknown', treatment_sum),
#            mandard_score = if_else(mandard_score == 'NA', 'unknown', mandard_score),
#            response = if_else(is.na(response), 'unknown', response),
#            response_v2 = if_else(is.na(response_v2), 'Unknown', response))
loginfo('total %g records', nrow(df))

[0m2024-02-21 10:08:45 INFO::these clinial info will be added: response, response_v2[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-21 10:08:45 INFO::total 52 records[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [18]:
p1 <- df %>% 
    count(treatment_sum, .data[[gp]]) %>% 
    mutate(pat_gp = factor(.data[[gp]], gp_lvls[[gp]])) %>%
    ggbarplot(x = 'treatment_sum', y = 'n', fill = 'pat_gp', 
              label = T, lab.vjust = -0.1, alpha = 0.7,
              position = position_dodge(0.7),
              order = c('chemo', 'chemo+immune', 'NA')) +
    scale_fill_jco(na.value = 'gray50') +
    labs(y = 'Number of patients', fill = 'Response') +
    theme(axis.title.x = element_blank(), legend.position = 'right', 
          axis.text.x = element_text(angle = 60, hjust = 0.98))
p2 <- df %>% 
    count(mandard_score, .data[[gp]]) %>% 
    mutate(pat_gp = factor(.data[[gp]], gp_lvls[[gp]])) %>%
    ggbarplot(x = 'mandard_score', y = 'n', fill = 'pat_gp', 
              label = T, lab.vjust = -0.1, alpha = 0.7) +
    scale_fill_jco(na.value = 'gray50') +
    labs(y = 'Number of patients', fill = 'Response') +
    theme(axis.title.x = element_blank(), legend.position = 'right')

In [19]:
ggsave(filename = str_glue('{outdir}/figs1a-bar-clin_summary.pdf'), width = 7, height = 5,
       plot = p1 + p2 + plot_layout(guides = 'collect') & theme(legend.position = 'right'))

## major cell type markers: s1b

In [5]:
# f_obj <- '../../stage4/a01_data/seuobj/whole.rds'
f_obj <- '../../seuobj/whole.rds'
markers <- list(
    'T+NK cells' = c('CD3D', 'CD3E', 'CD3G', 'NKG7'),
    'B cells' = c('CD19', 'MS4A1', 'CD79A', 'CD79B'),
    'Plasma' = c('JCHAIN', 'MZB1', 'XBP1'),
    'Myeloid' = c('CD68', 'CD14', 'LYZ'),
    'Epithelial' = c('EPCAM', 'KRT14', 'KRT15'),
    'Endothelial' = c('PECAM1', 'ICAM1', 'CLDN5'), 
    'Fibroblast' = c('COL1A1', 'COL1A2', 'C1R')
)
ctype_map <- list(
    'T cells' = c('T'),
    'B & Plasma cells' = c('B', 'Plasma')
)
ctype_order <- c('T+NK cells', 'B & Plasma cells', 'Myeloid', 'Epithelial', 'Endothelial', 'Fibroblast')

In [4]:
scrna <- readRDS(f_obj)

In [6]:
scrna$cellgp <- scrna$cell_type_correct
for (nm in names(ctype_map)) {
    scrna$cellgp[scrna$cell_type_correct %in% ctype_map[[nm]]] <- nm
}
scrna$cellgp <- factor(scrna$cellgp, levels = rev(ctype_order))

In [None]:
p <- DotPlot(object = scrna, features = markers, group.by = 'cellgp') +
    scale_color_gsea() +
    RotatedAxis() +
    theme(axis.title = element_blank()) 
# ggsave(filename = str_glue('{outdir}/figs1b-dot-major_markers.pdf'), plot = p, width = 11, height = 4)

In [10]:
ggsave(filename = str_glue('../figs1/figs1b-v3-dot-major_markers.pdf'), plot = p, width = 11, height = 4)

## batch effect heatmap: s1c

In [3]:
f_cell_info <- '../../stage4/a01_data/h5ad/whole_obs.csv'

In [4]:
df <- read_csv(f_cell_info, show_col_types = F)
loginfo('%g cells', nrow(df))

[1m[22mNew names:
[36m•[39m `` -> `...1`


[0m2024-02-20 21:58:19 INFO::750132 cells[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [11]:
# merge B & plasma cells
df <- df %>%
    mutate(
        macrotype = case_match(
            cell_type_correct,
            c('B', 'Plasma') ~ 'B & Plasma cells',
            'T' ~ 'T cells',
            .default = cell_type_correct
        ),
        subtype = if_else(subtype == 'T_Prolif', 'T_MKI67', subtype),
        sample_type = case_match(
            sample_type,
            'Baseline' ~ 'pre',
            'Treat' ~ 'post',
            .default = sample_type
        )
    )
df %>% count(macrotype, subtype) %>% dim

In [13]:
mat <- df %>%
    mutate(sample = paste(patient, sample_type, sep = '-')) %>%
    count(sample, subtype, name = 'n_cell') %>% 
    add_count(subtype, name = 'n_cell_total', wt = n_cell) %>% 
    mutate(pct = 100 * n_cell / n_cell_total) %>% 
    pivot_wider(id_cols = 'sample', names_from = 'subtype', 
                values_from = 'pct', values_fill = 0) %>% 
    column_to_rownames('sample')
mat %>% dim

In [14]:
macrotype_colors <- pal_nejm()(6)
names(macrotype_colors) <- c(
    'T cells', 'B & Plasma cells', 'Myeloid', 'Endothelial', 'Fibroblast', 'Epithelial'
)

In [15]:
set.seed(123)
col_anno <- HeatmapAnnotation(
    'Major cell type' = (count(df, macrotype, subtype) %>% pull(macrotype, name = 'subtype'))[colnames(mat)],
    col = list('Major cell type' = macrotype_colors),
    # show_legend = FALSE,
    which = 'column', height = unit(1, 'cm'), na_col = 'gray50'
)
treat_heatmap(
    mat = mat, cluster_columns = T, cluster_rows = F, name = 'Percent',
    row_names_gp = gpar(fontsize = 8), top_annotation = col_anno, fig_size = c(9, 8), 
    outfile = str_glue('{outdir}/figs1c-heat-sample_pct_in_subtype.pdf')
)

The automatically generated colors map from the 1^st and 99^th of the
values in the matrix. There are outliers in the matrix whose patterns
might be hidden by this color mapping. You can manually set the color
to `col` argument.

Use `suppressMessages()` to turn off this message.



## todos

## s1c: umap of major cell types

In [None]:
theme_set(theme_pubr())
outdir <- '../figures/fig1/'
title_map <- c(
    'fibro' = 'Fibroblast',
    'endo' = 'Endothelial',
    'b_plasma' = 'B & Plasma cells',
    't' = 'T cells',
    'myeloid' = 'Myeloid',
    'epi' = 'Epithelial',
    'whole' = 'Major cell type'
)
obj_path <- c(
    'fibro' = '../../stage4/a01_data/seuobj/fibro.rds',
    'endo' = '../../stage4/a01_data/seuobj/endo.rds',
    'b_plasma' = '../../stage4/a01_data/seuobj/b_plasma.rds',
    't' = '../../stage4/a01_data/seuobj/t.rds',
    'myeloid' = '../../stage4/a01_data/seuobj/myeloid.rds',
    'epi' = '../../stage4/a01_data/seuobj/epi.rds',
)

In [None]:
for (nm in names(obj_path)) {
    scrna <- readRDS(obj_path[nm])
    if (nm == 't') {
        scrna$subtype[scrna$subtype == 'T_Prolif'] <- 'T_MKI67'
    }
    color_by <- if_else(nm == 't', 'subtype', if_else(nm == 'whole', 'cell_type_correct', 'cell_type'))
    p <- DimPlot(object = scrna, reduction = 'umap', group.by = color_by, raster = T, shuffle = T,
                 label = T, label.size = 4, cols = scanpy_pal$zeileis_28, repel = T) +
        labs(title = title_map[nm]) +
        theme(axis.line = element_blank(), 
              axis.ticks = element_blank(),
              axis.text = element_blank(),
              axis.title = element_blank(),
              legend.position = 'none')
    ggsave(filename = str_glue('{outdir}/fig1-umap_celltype-{nm}.pdf'), plot = p, width = 5, height = 4)
    rm(scrna)
    gc()
}

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`



In [None]:
subdata <- subset(x = scrna, downsample = 10000)
p <- DimPlot(subdata, group.by = 'cellgp', pt.size = 0.8, label = T) +
    NoLegend() + 
    scale_color_jama() +
    theme(plot.title = element_blank())
ggsave('../fig1/fig1-umap_celltype-whole-v3.pdf', plot=p, height = 4, width = 5)