# figure S4

- kernel: r_env, R 4.1.3
- date: 2024-01-19
- desc: supplementary figure 4

## load

In [None]:
library(tidyverse)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)

source('../../stage2/scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.1     [32m✔[39m [34mtibble   [39m 3.2.0
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [None]:
# basical informatino
outdir <- '../figures/figs6'
create_dir(outdir)
f_pat_gp <- '../tables/patient_info_v2.tsv'

In [None]:
gp <- 'response'

gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_lvls$response <- c('R', 'PR')
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

## virus detection status in major cell type

- desc: virus-detected means > 0 reads mapped to virus

In [None]:
f_obs <- '../../stage4/a24_microbe/domain_lvl/whole_obs_with_domain_logrpm.rds'
cutoff <- 0
ctype_map <- list(
    'B & Plasma' = c('B', 'Plasma'),
    'T cells' = c('T')
)
ctype_order <- c('B & Plasma', 'Myeloid', 'T cells', 'Endothelial', 'Epithelial', 'Fibroblast')

In [None]:
meta <- readRDS(f_obs)
meta$celltype <- meta$cell_type_correct
for (nm in names(ctype_map)) {
    meta$celltype[meta$cell_type_correct %in% ctype_map[[nm]]] <- nm
}

table(meta$celltype, meta$Viruses != 0, useNA = 'ifany')

             
               FALSE   TRUE   <NA>
  B & Plasma   83532  19649     31
  Endothelial  34688   9349     18
  Epithelial   51237  23750     53
  Fibroblast   87797  23322     45
  Myeloid      89475  24917    244
  T cells     269138  32360    527

### overview: s6a

In [None]:
# rpm to binary, show proportion in cell types
df <- meta %>%
    mutate(bi_type = case_when(
        is.na(Viruses) ~ NA_character_,
        Viruses > cutoff ~ 'Virus-detected',
        Viruses <= cutoff ~ 'Non-detected',
        TRUE ~ 'others'
    )) %>%
    count(celltype, bi_type, name = 'n_cell')
df %>% write_tsv(str_glue('{outdir}/fig6a-virus_detection_in_macrotype-bi-cell_cnt.tsv'))

In [None]:
# plot
p <- read_tsv(str_glue('{outdir}/fig6a-virus_detection_in_macrotype-bi-cell_cnt.tsv'), show_col_types = F) %>%
    mutate(bi_type = factor(bi_type, c('Virus-detected', 'Non-detected')),
           celltype = factor(celltype, ctype_order)) %>%
    ggbarplot(x = 'celltype', y = 'n_cell', fill = 'bi_type', position = position_fill()) +
    scale_fill_nejm(na.value = 'gray50') +
    labs(y = 'Cell proportion', fill = '') +
    theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 0.98))
ggsave(filename = str_glue('{outdir}/figs6a-bar-virus_dist_in_macrotype.pdf'), width = 5, height = 4, plot = p)

### sample level: s6b

- desc: min 50 cells each sample each major cell type

In [None]:
min_cell <- 50

# rpm to binary, show proportion in cell types at sample level
df <- meta %>%
    mutate(bi_type = case_when(
        is.na(Viruses) ~ NA_character_,
        Viruses > cutoff ~ 'Virus-detected',
        Viruses <= cutoff ~ 'Non-detected',
        TRUE ~ 'others'
    )) %>%
    count(celltype, sample, bi_type, name = 'n_cell') %>%
    add_count(celltype, sample, wt = n_cell, name = 'n_cell_per_sample_ctype') %>%
    filter(n_cell_per_sample_ctype >= min_cell) %>%
    mutate(pct = 100 * n_cell / n_cell_per_sample_ctype) %>%
    pivot_wider(id_cols = c('sample', 'celltype',), 
                names_from = 'bi_type', values_from = 'pct', values_fill = 0) %>%
    separate(col = 'sample', into = c('patient', 'sample_type'), sep = '-', remove = F) %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post')) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient')
df %>% write_tsv(str_glue('{outdir}/fig6b-virus_detection_in_macrotype-bi-sample_lvl-pct.tsv'))

[0m2024-02-22 15:13:45 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p <- read_tsv(str_glue('{outdir}/fig6b-virus_detection_in_macrotype-bi-sample_lvl-pct.tsv'), show_col_types = F) %>% 
    filter(!is.na(.data[[gp]])) %>%
    mutate(celltype = factor(celltype, ctype_order)) %>% 
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'Virus-detected', pt_fill = gp, pair_by = 'patient', 
                      xorder = comb_order, fill_order = gp_lvls[[gp]], facet_by = 'celltype', ncol = 3) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    labs(y = 'Percent of virus-detected cells', fill = 'Response') +
    theme(axis.title.x = element_blank())
ggsave(filename = str_glue('{outdir}/figs6b-box_cell_pct-virus_detected_cells.pdf'), width = 8, height = 7, plot = p)

In [3]:
# T cell state mapping
df <- read_tsv(f_cell_state, show_col_types = F)

cell_state_order <- c('cytotoxic', 'exhausted', 'dying', 'others', 'NK')
cell_state_map <- split(df$node, df$node_type)
cell_state_map$NK <- c('NK', 'NK_CX3CR1')
cell_state_map <- cell_state_map[cell_state_order]

cell_state_color <- c('exhausted' = '#023fa5', 'cytotoxic' = 'red', 
                      'dying' = '#bb7784', 'others' = '#ff9639',
                      'NK' = 'gray50')

In [4]:
cell_state_map