# figure S5

- kernel: r_env, R 4.1.3
- date: 2024-02-22
- desc: supplementary figure 5

## load

In [1]:
library(tidyverse)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)

source('../../stage2/scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.1     [32m✔[39m [34mtibble   [39m 3.2.0
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
# basical informatino
outdir <- '../figures/figs5'
create_dir(outdir)
celltype_map <- list(
    'T_MKI67' = c('T_Prolif')
)
f_cell_state <- '../figures/fig3/fig3-net_tcr_sharing-node_info.tsv'
f_pat_gp <- '../tables/patient_info_v2.tsv'

In [3]:
gp <- 'response'

gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_lvls$response <- c('R', 'PR')
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

In [4]:
# T cell state mapping
df <- read_tsv(f_cell_state, show_col_types = F)

cell_state_order <- c('cytotoxic', 'exhausted', 'dying', 'others', 'NK')
cell_state_map <- split(df$node, df$node_type)
cell_state_map$NK <- c('NK', 'NK_CX3CR1')
cell_state_map <- cell_state_map[cell_state_order]

cell_state_color <- c('exhausted' = '#023fa5', 'cytotoxic' = 'red', 
                      'dying' = '#bb7784', 'others' = '#ff9639',
                      'NK' = 'gray50')

In [5]:
cell_state_map

## s5a: vln of exhau/cyto score in clusters

- re-run at 2024-02-22

In [None]:
f_score <- '../../stage4/miscellaneous/scores/ex_cyto_score_old.csv'
prefix <- 'figs4a'

In [None]:
df <- read_csv(f_score, show_col_types = F)
loginfo('%g cells', nrow(df))

# rename cell types
df$celltype <- df$subtype
for (nm in names(celltype_map)) {
    df$celltype[df$subtype %in% celltype_map[[nm]]] <- nm
}
# add cell state info
df$cell_state <- df$celltype
for (nm in names(cell_state_map)){
    df$cell_state[df$celltype %in% cell_state_map[[nm]]] <- nm
}

# cell level info
df <- select(df, barcode = '...1', celltype, cell_state, ex_score = Exhaustion, cyto_score = Cytotoxicity)
df %>% write_tsv(str_glue('{outdir}/{prefix}-exhau_cyto_score.tsv'))

[1m[22mNew names:
[36m•[39m `` -> `...1`


[0m2024-02-22 09:34:26 INFO::302025 cells[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
ycol_map <- list('ex_score' = 'Exhaustion score', 'cyto_score' = 'Cytotoxicity score')
dp <- read_tsv(str_glue('{outdir}/{prefix}-exhau_cyto_score.tsv'), show_col_types = F) %>%
    mutate(celltype = factor(celltype, unlist(cell_state_map)),
           cell_state = factor(cell_state, cell_state_order))
p_ls <- lapply(names(ycol_map), function(ycol){
    ggplot(aes(x = celltype, y = .data[[ycol]]), data = dp) +
    geom_violin(aes(fill = cell_state), scale = 'width', alpha = 0.6) +
    scale_fill_manual(values = cell_state_color) +
    labs(y = ycol_map[[ycol]]) +
    theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 0.96))
})
ggsave(filename = str_glue('{outdir}/{prefix}-vln-exhau_cyto_score-cluster.pdf'),
       plot = wrap_plots(p_ls, nrow = 1, guides = 'collect'), width = 14, height = 4.5)

## s5d: CXCL13 expression in T cells

- date: 2024-02-22
- desc: use CD8 T cells (cyto/exhau/others) + NK + Treg, psuodubulk expression (min 50)

In [None]:
fpbulk <- '../../stage4/miscellaneous/exprs_box/chemokine_receptor_in_t_state-pbulk_count-min50.tsv'
genes <- 'CXCL13'
ctype_order <- c('cytotoxic CD8', 'exhausted CD8', 'other CD8', 'Treg', 'NK')

In [None]:
exprs_pbulk <- read_tsv(fpbulk, show_col_types = F) %>% 
    column_to_rownames('symbol') %>%
    mutate(across(everything(), ~ log1p(1e6 * .x / sum(.x)))) %>%
    slice(which(rownames(.) %in% genes)) %>% 
    t() %>% 
    as.data.frame() %>% 
    rownames_to_column('sample_cellgp') %>% 
    extract(col = 'sample_cellgp', into = c('patient', 'sample_type', 'cellgp'), regex = '(.+)-(.+)_(.+)') %>%
    pivot_longer(cols = all_of(genes), names_to = 'gene', values_to = 'logcpm') %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>% 
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))
exprs_pbulk %>% write_tsv(str_glue('{outdir}/figs4c-pbulk_exprs_in_t_state-CXCL13.tsv'))

[0m2024-02-22 11:00:32 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p <- read_tsv(str_glue('{outdir}/figs4c-pbulk_exprs_in_t_state-CXCL13.tsv'), show_col_types = F) %>%
    filter(!is.na(.data[[gp]])) %>%
    mutate(cellgp = factor(cellgp, levels = ctype_order)) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), xorder = comb_order, y = 'logcpm', 
                      pt_fill = gp, fill_order = gp_lvls[[gp]],
                      facet_by = 'cellgp', wrap_free = 'free', ncol = 10, xangle = 60) +
    labs(fill = 'Response', y = 'CXCL13 expression (log1p CPM)') +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    theme(axis.title.x = element_blank())
ggsave(str_glue('{outdir}/figs4c-box_pbulk_exprs-CXCL13_in_t_state.pdf'), plot = p, width = 9, height = 5)

## s5e: T cell composition:

- date: 2024-02-22
- desc: use selected clusters

In [None]:
f_cc <- '../figures/fig3/fig3c-t_cc.tsv'
used_clusters <- c('CD8_FOS', 'CD8_CX3CR1', 'CD8_KLRB1', 'Treg_LTB', 'Treg_TNFRSF4', 'T_Mito')

In [None]:
# plot
p <- read_tsv(f_cc, show_col_types = F) %>% 
    filter(!is.na(.data[[gp]])) %>% 
    filter(cell_type %in% used_clusters) %>% 
    mutate(cell_type = factor(cell_type, used_clusters)) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'pct', pt_fill = gp, xangle = 60, 
                          xorder = comb_order, fill_order = gp_lvls[[gp]], ncol = 7) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    labs(y = 'Percent in T+NK cells', fill = 'Response')
ggsave(filename = str_glue('{outdir}/figs4d-box_cc-selected_t_clusters.pdf'), plot = p, width = 12, height = 5)

## s5f: expanded cell percent in subtype

In [None]:
f_expand <- '../../stage4/a03_tcr/expansion/expand_cell_pct.csv'
min_cell <- 10

In [None]:
# update
df <- read_csv(file = f_expand, show_col_types = F) %>% 
    filter(n_cell_per_sample_subtype >= min_cell) %>%
    select(patient, sample_type, subtype, n_expand_cell_per_sample_stype, n_cell_per_sample_subtype,
           n_cell_per_sample, pct_by_subtype, n_cell_per_sample) %>% 
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

# rename
for (nm in names(celltype_map)) {
    df$subtype[df$subtype %in% celltype_map[[nm]]] <- nm
}
df %>% write_tsv(str_glue('{outdir}/fig3d-t_expand_in_subtype-min{min_cell}.tsv'))
loginfo('%g combinations of sample & cell type for expansion results', nrow(df))

[0m2024-02-05 10:40:59 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-05 10:40:59 INFO::1349 combinations of sample & cell type for expansion results[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
# plot
df <- read_tsv(str_glue('{outdir}/fig3d-t_expand_in_subtype-min{min_cell}.tsv'), show_col_types = F)
p_ls <- lapply(names(use_ctypes), FUN = function(nm) {
    df %>% 
    filter(subtype %in% use_ctypes[[nm]]) %>% 
    filter(!is.na(.data[[gp]])) %>% 
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'pct_by_subtype', pt_fill = gp, facet_by = 'subtype',
                      xorder = comb_order, fill_order = gp_lvls[[gp]],  xangle = 60, size = 2, ncol = 7) +
    stat_compare_means(comparisons = comp_ls) +
    labs(y = 'Expanded cell percent in cluster', fill = 'Response', title = nm) +
    theme(plot.background = element_rect(color = 'black'), # add border
          axis.title.x = element_blank(),  # blank don't occupy space
          plot.title = element_text(hjust = 0.5))
})
# remove y labs expcept for first plot
for (i in 2:length(p_ls)) {
    p_ls[[i]] <- p_ls[[i]] + theme(axis.title.y = element_blank())
}
# combine plots
p <- wrap_plots(p_ls, nrow = 1, widths = c(3, 3), guides = 'collect') & 
    theme(legend.margin = margin(-6, 0, -1, 0))
# save
ggsave(filename = str_glue('{outdir}/fig3d-box_expand-selected_t_subtypes.pdf'), plot = p, width = 13, height = 5)