# figure S6

- kernel: r_env, R 4.1.3
- date: 2024-02-22
- desc: supplementary figure 6

## load

In [None]:
library(tidyverse)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)

source('../scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.1     [32m✔[39m [34mtibble   [39m 3.2.0
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
# basical informatino
outdir <- '../figures/figs6'
create_dir(outdir)
f_pat_gp <- '../tables/patient_info_v2.tsv'

In [3]:
gp <- 'response'

gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_lvls$response <- c('R', 'PR')
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

## s6a: chemokine receptor expression in T cells

- desc: use CD8 T cells (cyto/exhau), psuodubulk expression (min 50)

In [None]:
fpbulk <- '../../stage4/miscellaneous/exprs_box/chemokine_receptor_in_t_state-pbulk_count-min50.tsv'
genes <- c('ACKR3', 'CCR2', 'CCR4', 'CCR5', 'CCR6', 'CXCR1', 'CXCR2', 'CXCR3')
used_ctypes <- c('cytotoxic CD8', 'exhausted CD8')

In [None]:
exprs_pbulk <- read_tsv(fpbulk, show_col_types = F) %>% 
    column_to_rownames('symbol') %>%
    mutate(across(everything(), ~ log1p(1e6 * .x / sum(.x)))) %>%
    slice(which(rownames(.) %in% genes)) %>% 
    t() %>% 
    as.data.frame() %>% 
    rownames_to_column('sample_cellgp') %>% 
    extract(col = 'sample_cellgp', into = c('patient', 'sample_type', 'cellgp'), regex = '(.+)-(.+)_(.+)') %>%
    pivot_longer(cols = all_of(genes), names_to = 'gene', values_to = 'logcpm') %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>% 
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))
exprs_pbulk %>% write_tsv(str_glue('{outdir}/figs5a-pbulk_exprs_in_t_state-chemokine_receptors.tsv'))

[0m2024-02-22 13:38:52 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p_ls <- lapply(X = used_ctypes, FUN = function(nm) {
    exprs_pbulk %>%
    filter(cellgp == nm) %>%
    filter(!is.na(.data[[gp]])) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), xorder = comb_order, y = 'logcpm', 
                      pt_fill = gp, fill_order = gp_lvls[[gp]],
                      facet_by = 'gene', wrap_free = 'free', ncol = 10, xangle = 60) +
    labs(fill = 'Response', y = str_glue('Expression in {nm} T clusters')) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    theme(legend.position = 'none', axis.title.x = element_blank())
})
ggsave(filename = str_glue('{outdir}/figs5a-box_pbulk_exprs-chemokine_receptor_in_t_state.pdf'),
       plot = wrap_plots(p_ls, ncol = 1), width = 17, height = 8)

## s6b: chemokine expression in sample

In [None]:
f_pbulk <- '../../stage4/a06_de/cntbulk_whole_min50/bulk_by_samp.tsv'
genes <- c(
    'CCL2', 'CCL4', 'CCL5', 'CXCL6', 'CCL17', 'CCL20', 'CCL22', 
    'CXCL2', 'CXCL5', 'CXCL9', 'CXCL10', 'CXCL11', 'CXCL12'
)

In [None]:
exprs_pbulk <- read_tsv(f_pbulk, show_col_types = F) %>%
    mutate(across(where(is.numeric), ~ log1p(1e6 * .x / sum(.x)))) %>%
    filter(symbol %in% genes) %>% 
    pivot_longer(cols = -c('symbol'), names_to = 'sample', values_to = 'logcpm') %>%
    extract(col = 'sample', into = c('patient', 'sample_type'), regex = '(.+)-(.+)', remove = F) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))
exprs_pbulk %>% write_tsv(str_glue('{outdir}/figs5b-pbulk_exprs_in_whole-chemokine_ligands.tsv'))

[0m2024-02-22 14:01:46 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p <- read_tsv(str_glue('{outdir}/figs5b-pbulk_exprs_in_whole-chemokine_ligands.tsv'), show_col_types = F) %>%
    mutate(symbol = factor(symbol, genes)) %>%
    filter(!is.na(.data[[gp]])) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), xorder = comb_order, y = 'logcpm', 
                      pt_fill = gp, fill_order = gp_lvls[[gp]],
                      facet_by = 'symbol', wrap_free = 'free', ncol = 7, xangle = 60) +
    labs(fill = 'Response', y = str_glue('Expression in whole sample')) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    theme(legend.position = 'none', axis.title.x = element_blank())
ggsave(filename = str_glue('{outdir}/figs5b-box_pbulk_exprs-chemokine_ligand_in_whole.pdf'),
       plot = p, width = 17, height = 8)

## s6c: CXCL12 expression in major cell type

In [None]:
indir <- '../../stage4/a06_de/cntbulk_major_min50/'
my_gene <- 'CXCL12'

major_cell_map <- list(
    'b_plasma' = 'B & Plasma', 'myeloid' = 'Myeloid', 't' = 'T cells', 
    'endo' = 'Endothelial', 'epi' = 'Epithelial', 'fibro' = 'Fibroblast'
)

In [None]:
# obtain expression
exprs_pbulk <- rbind.data.frame()
for (nm in names(major_cell_map)) {
    exprs_pbulk <- read_tsv(str_glue('{indir}/{nm}/bulk_by_samp.tsv'), show_col_types = F) %>%
        mutate(across(where(is.numeric), ~ log1p(1e6 * .x / sum(.x)))) %>%
        filter(symbol == my_gene) %>% 
        pivot_longer(cols = -c('symbol'), names_to = 'sample', values_to = 'logcpm') %>%
        mutate(celltype = major_cell_map[[nm]]) %>%
        rbind.data.frame(exprs_pbulk)
}
exprs_pbulk <- exprs_pbulk %>%
    extract(col = 'sample', into = c('patient', 'sample_type'), regex = '(.+)-(.+)', remove = F) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

exprs_pbulk %>% write_tsv(str_glue('{outdir}/figs5c-publk_exprs_in_major-{my_gene}.tsv'))

[0m2024-02-22 14:28:13 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p <- read_tsv(str_glue('{outdir}/figs5c-publk_exprs_in_major-{my_gene}.tsv'), show_col_types = F) %>%
    filter(!is.na(.data[[gp]])) %>%
    mutate(celltype = factor(celltype, levels = unlist(major_cell_map))) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), xorder = comb_order, y = 'logcpm', 
                      pt_fill = gp, fill_order = gp_lvls[[gp]],
                      facet_by = 'celltype', wrap_free = 'free_x', ncol = 8, xangle = 60) +
    labs(fill = 'Response', y = str_glue('{my_gene} Expression')) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    theme(legend.position = 'bottom', axis.title.x = element_blank())
ggsave(filename = str_glue('{outdir}/figs5c-box_pbulk_exprs-{my_gene}_in_major.pdf'), width = 10, height = 5, plot = p)