# figure s8

- kernel: r_env, R 4.1.3
- date: 2024-0-22

## load

In [None]:
library(tidyverse)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)
library(Seurat)

source('../scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

outdir <- '../figures/figs8'
create_dir(outdir)

In [14]:
f_pat_gp <- '../tables/patient_info_v2.tsv'
gp <- 'response'
gp_lvls$response <- c('R', 'PR')
gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_comp_diff_map$response <- list(c('R', 'PR'))
gp_comp_map_pre$response <- list(c('R-pre', 'PR-pre'))
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

## s8a: CD8_CXCL13 analysis

### delta expanded percent in subtype

min10 cells

In [22]:
f_expand_diff <- '../figures/fig5/fig5a-expand_in_subtype-post_pre_delta.tsv'  # this is based on min10 cells results

In [24]:
p <- read_tsv(f_expand_diff, show_col_types = F) %>% 
    filter(subtype == 'CD8_CXCL13') %>%
    filter(!is.na(.data[[gp]])) %>% 
    cell_comp_boxplot(x = gp, y = 'pct_expand_diff', pt_fill = gp, pair_by = NULL, facet_by = NULL,
                      xorder = gp_lvls[[gp]], fill_order = gp_lvls[[gp]], xangle = 60) +
    stat_compare_means(comparisons = gp_comp_diff_map[[gp]]) +
    labs(y = bquote(atop(Delta~'expanded cell percent in CD8_CXCL13', '(post - pre)')), fill = 'Response') +
    theme(legend.justification = c(1, 0))
ggsave(filename = str_glue('{outdir}/figs8a-box_expand_diff-CD8_CXCL13.pdf'), plot = p, width = 3, height = 4.5)

### shannon index

min 10 cells

In [25]:
f_diversity <- '../../stage4/a03_tcr/diversity/shannon_way2_min10.csv'

In [27]:
# process
df <- read_csv(f_diversity, show_col_types = F) %>%
    filter(subtype == 'CD8_CXCL13') %>%
    select(patient, sample_type, shannon) %>% 
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

loginfo('%g samples for CD8_CXCL13', nrow(df))
df %>% write_tsv(str_glue('{outdir}/figs8a-diversity-CD8_CXCL13-min10.tsv'))

[0m2024-02-23 00:07:27 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-23 00:07:27 INFO::76 samples for CD8_CXCL13[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [28]:
p <- read_tsv(str_glue('{outdir}/figs8a-diversity-CD8_CXCL13-min10.tsv'), show_col_types = F) %>% 
    filter(!is.na(.data[[gp]])) %>% 
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'shannon', pt_fill = gp, facet_by = NULL,
                      xorder = comb_order, fill_order = gp_lvls[[gp]], xangle = 45) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    labs(y = 'Shannon index', fill = 'Response')
ggsave(filename = str_glue('{outdir}/figs8a-box_diversity-CD8_CXCL13.pdf'), plot = p, width = 3, height = 4.5)

## s8b: clone fraction: shared with CD8_CX3CR1 vs shared with non-CD8_CX3CR1

- sharing at sample level
- re-calculate clone fraction (based on RNA & TCR data)
- exclude CD8_CX3CR1 when plot

In [None]:
f_clone_info <- '../../stage4/a05_clone_share/clone_share_old/clone_sharing_info_per_cell.csv'
comp_ls <- list(
    c('CD8_CX3CR1_shared', 'otherT_shared'), 
    c('otherT_shared', 'specific'), 
    c('CD8_CX3CR1_shared', 'specific')
)
share_type_label_map <- c(
    'CD8_CX3CR1_shared' = 'shared with\nCD8_CX3CR1',
    'otherT_shared' = 'shared with\nnon-CD8_CX3CR1', 
    'specific' = 'unshared'
)

In [None]:
# re-calculate clone fraction
clone_frac <- read_csv(f_clone_info, show_col_types = F)  %>%
    add_count(sample, clonotype, name = 'n_cell_per_sample_clone') %>% 
    add_count(sample, name = 'n_cell_per_sample') %>% 
    select(sample, patient, sample_type, subtype, clonotype, share_type, n_cell_per_sample_clone, n_cell_per_sample) %>% 
    distinct() %>% 
    mutate(clone_frac_new = n_cell_per_sample_clone / n_cell_per_sample) %>%
    filter(subtype != 'CD8_CX3CR1') %>% # no CD8_CX3CR1
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

# rename cell types
for (nm in names(celltype_map)) {
    clone_frac$subtype[clone_frac$subtype %in% celltype_map[[nm]]] <- nm
}
# add cell state info
clone_frac$cell_state <- 'unknown'
for (nm in names(cell_state_map)) {
    clone_frac$cell_state[clone_frac$subtype %in% cell_state_map[[nm]]] <- nm
}
clone_frac$cell_state[clone_frac$cell_state == 'cytotoxic'] <- 'other cytotoxic'

clone_frac %>% write_tsv(str_glue('{outdir}/fig5d-tcr_share_inter_subtype-clone_frac_new.tsv'), quote = 'needed')
loginfo('%g records of clonotypes in each sample, each subtype', nrow(clone_frac))

[0m2024-02-05 18:06:34 INFO::131179 records of clonotypes in each sample, each subtype[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
# plot
p <- read_tsv(str_glue('{outdir}/fig5d-tcr_share_inter_subtype-clone_frac_new.tsv'), show_col_types = F) %>% 
    mutate(cell_state = factor(cell_state, levels = c('other cytotoxic', 'exhausted', 'dying', 'others')),
           share_type = factor(share_type, levels = names(share_type_label_map))) %>%
    filter(!grepl('^NK', subtype)) %>%
    ggviolin(x = 'share_type', y = 'clone_frac_new', fill = 'share_type', 
             facet.by = 'cell_state', ncol = 9, add = 'boxplot') + 
    stat_compare_means(comparisons = comp_ls) +
    scale_fill_jco(breaks = names(share_type_label_map), labels = share_type_label_map) +
    scale_x_discrete(labels = share_type_label_map) +
    scale_y_continuous(trans = 'log10') +  # log10 won't influence wilcoxon test p value
    labs(y = 'Clonotype fraction at sample level\n(RNA & TCR data)', fill = 'TCR sharing type') +
    theme(axis.text.x = element_text(angle = 45, hjust = 0.95), axis.title.x = element_blank())
ggsave(filename = str_glue('{outdir}/fig5d-vln_tcr_share_inter_subtype-clone_frac_new.pdf'), 
       plot = p, width = 14, height = 6)

## s8d: endo markers expression

In [None]:
f_obj <- '../../stage4/a01_data/seuobj/endo.rds'
markers <- list(
    'endo' = c('PECAM1', 'PLVAP'),
    'endo_vasc' = c('ICAM1', 'TEK'), 
    'endo_lymph' = c('LYVE1', 'PDPN', 'PROX1', 'FLT4'),
    'endo_inflam' = c('VWF', 'SELP', 'VCAM1', 'CX3CL1'),
    'junct' = c('CDH5'),
    'pericyte' = c('ACTA2', 'RGS5'),
    'endo_progen' = c('FLT1', 'KDR', 'MKI67'),
    'endoMT_transi' = c('DCN', 'COL1A1', 'COL1A2')
)

In [None]:
scrna <- readRDS(f_obj)

In [None]:
p <- DotPlot(scrna, feature = markers, group.by = 'cell_type') +
    RotatedAxis() +
    scale_color_gsea() +
    theme(axis.title = element_blank())
ggsave(filename = str_glue('{outdir}/figs8d-dot_exprs-endo_markers.pdf'),
       plot = p, width = 14, height = 6)

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


## CX3CL1 expression

### s8c: dotplot in whole data

In [None]:
scrna <- readRDS('../../stage4/a01_data/seuobj/whole.rds')

In [None]:
scrna$celltype <- scrna$cell_type_correct
scrna$celltype[scrna$cell_type_correct %in% c('B', 'Plasma')] <- 'B & Plasma cells'
scrna$celltype[scrna$cell_type_correct %in% c('T')] <- 'T cells'
table(scrna$celltype)


B & Plasma cells      Endothelial       Epithelial       Fibroblast 
          103212            44055            75040           111164 
         Myeloid          T cells 
          114636           302025 

In [None]:
p <- Seurat::DotPlot(scrna, features = 'CX3CL1', group.by = 'celltype') +
    scale_color_gsea() +
    theme(axis.title = element_blank())
ggsave(filename = str_glue('{outdir}/fig5e-dot_exprs-cx3cl1-whole.pdf'), plot = p, width = 3.5, height = 4)

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


### weighted expression in Endo: s8e-1

min 50 cells

In [40]:
f_pbulk <- '../../stage4/a04_cx3cl1/endo_wt_cx3cl1_exprs_min50/cx3cl1_weighted_exprs_vs_t_cc.tsv'

In [50]:
df <- read_tsv(f_pbulk, show_col_types = F) %>% 
    select(sample, sample_type, patient, wt_cx3cl1_exprs_unlog) %>%
    distinct() %>%
    mutate(logcpm = log1p(wt_cx3cl1_exprs_unlog),
           sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post')) %>% 
    add_clin_info(f_pat_gp, columns = gp, merge_by = 'patient')
df %>% write_tsv(str_glue('{outdir}/figs8c-pbulk_exprs-weighted_CX3CL1_in_endo.tsv'))

[0m2024-02-23 00:27:11 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [54]:
p <- df %>%
    filter(.data[[gp]] %in% gp_lvls[[gp]]) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'logcpm', pt_fill = gp, xangle = 60,
                      xorder = comb_order, fill_order = gp_lvls[[gp]], facet_by = NULL) +
    stat_compare_means(comparisons = mandard_group_comp) +
    labs(y = 'Weighted CX3CL1 expression in Endo', fill = 'Response') +
    stat_compare_means(comparisons = gp_comp_map[[gp]])
ggsave(filename = str_glue('{outdir}/figs8c-box_pbulk_exprs-weighted_CX3CL1_in_endo.pdf'),
       plot = p, width = 4, height = 6)

### expression in sample: s8e-2

In [None]:
f_exprs <- '../../stage4/a04_cx3cl1/tme_exprs/cx3cl1_tme_exprs.tsv'

In [None]:
# add clin info
df <- read_tsv(f_exprs, show_col_types = F) %>%
    separate(col = 'sample', into = c('patient', 'sample_type'), sep = '-', remove = F) %>% 
    mutate(cx3cl1_exprs_log = log1p(cx3cl1_exprs_unlog)) %>% 
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

df %>% write_tsv(str_glue('{outdir}/fig5f-pbulk_exprs-cx3cl1_in_TME.tsv'))

[0m2024-02-05 17:19:07 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
p <- read_tsv(str_glue('{outdir}/fig5f-pbulk_exprs-cx3cl1_in_TME.tsv'), show_col_types = F) %>% 
    filter(!is.na(.data[[gp]])) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'cx3cl1_exprs_log', pt_fill = gp, facet_by = NULL,
                      xorder = comb_order, fill_order = gp_lvls[[gp]], xangle = 60) + 
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    labs(y = 'CX3CL1 expression in sample', fill = 'Response')
ggsave(filename = str_glue('{outdir}/fig5f-box_exprs-cx3cl1_pbulk_in_TME.pdf'), plot = p, height = 5, width = 4)

### s8f: weight expression vs CD8_CX3CR1 shannon index

In [84]:
f_data <- '../../stage4/a04_cx3cl1/endo_wt_cx3cl1_exprs_min50/cx3cl1_weighted_exprs_vs_t_diversity.tsv'

In [85]:
df <- read_tsv(f_data, show_col_types = F) %>% 
    mutate(wt_cx3cl1_exprs_log1p = log1p(wt_cx3cl1_exprs_unlog)) %>%
    filter(subtype == 'CD8_CX3CR1') %>% 
    select(sample, patient, sample_type, wt_cx3cl1_exprs_log1p, cd8_cx3cr1_shannon = shannon) %>% 
    add_clin_info(f_pat_gp, columns = gp, merge_by = 'patient') %>% 
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))
df %>% write_tsv(str_glue('{outdir}/figs8e-weighted_CX3CL1_exprs-CD8_CX3CR1_diversity.tsv'))

[0m2024-02-23 01:09:52 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [87]:
p <- df %>%
    mutate(color_by = factor(.data[[gp]], gp_lvls[[gp]])) %>%
    scatter_with_fit(x = 'wt_cx3cl1_exprs_log1p', y = 'cd8_cx3cr1_shannon', color = 'color_by') +
    scale_color_nejm(na.value = 'gray50') +
    ylim(0, NA) +
    labs(x = 'Weighted CX3CL1 expression in Endo',
         y = 'CD8_CX3CR1 clone diversity\n(shannon index)',
         color = 'Response')
ggsave(filename = str_glue('{outdir}/figs8e-scatter_corr-weighted_CX3CL1_exprs-CD8_CX3CR1_diversity.pdf'),
       plot = p, width = 5, height = 4)

### s8g: CD8_CX3CR1 expanded pct in T+NK group by weight CX3CL1 exprs in endo

In [90]:
f_data <- '../../stage4/a04_cx3cl1/endo_wt_cx3cl1_exprs/cx3cl1_weighted_exprs_vs_clonoExpand.tsv'

In [95]:
df <- read_tsv(file = f_data, show_col_types = F) %>% 
    mutate(wt_cx3cl1_exprs_log1p = log1p(wt_cx3cl1_exprs_unlog)) %>%
    filter(subtype == 'CD8_CX3CR1') %>% 
    select(sample, patient, wt_cx3cl1_exprs_log1p, cd8_cx3cr1_expand_pct_in_TNK = expand_pct_in_TNK) %>% 
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient')
df %>% write_tsv(str_glue('{outdir}/figs8f-weighted_CX3CL1_exprs-CD8_CX3CR1_expand_pct_in_TNK.tsv'))

[0m2024-02-23 01:14:50 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [100]:
cutoff <- quantile(df$wt_cx3cl1_exprs_log1p, 0.5)
loginfo('expression cutoff: %g', cutoff)
p <- df %>% 
    mutate(exprs_type = if_else(wt_cx3cl1_exprs_log1p <= cutoff, 'Low', 'High'),
           color_by = factor(.data[[gp]], gp_lvls[[gp]])) %>% 
    cell_comp_boxplot(x = 'exprs_type',
                      y = 'cd8_cx3cr1_expand_pct_in_TNK', 
                      pt_fill = 'color_by',
                      facet_by = NULL, pair_by = NULL,
                      xorder = NULL, fill_order = gp_lvls[[gp]]) +
    stat_compare_means(comparisons = list(c('High', 'Low'))) +
    scale_y_continuous(trans=lognp_trans(n = 1),
                       breaks = c(0, seq(1, 5, 1), 10)) +
    labs(y = 'Expanded CD8_CX3CR1 cells in T+NK', fill = 'Response')
ggsave(filename = str_glue('{outdir}/figs8f-CD8_CX3CR1_expand_pct_in_TNK-by_wt_cx3cl1_exprs_median.pdf'),
       plot = p, width = 4, height = 6)

[0m2024-02-23 01:19:36 INFO::expression cutoff: 0.840079[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


### s8h: endo cell composition

only 3 samples with <= 50 endo cells (min 34), thus not filter by cell count

In [107]:
f_cc <- '../../stage4/a02_cc/composition/endo_cell_comp.csv'
valid_ctypes <- c('Endo_ACKR1', 'Endo_CCL21')

In [118]:
df <- read_csv(f_cc, show_col_types = F) %>%
    filter(cell_type %in% valid_ctypes) %>%
    select(sample, patient, sample_type, cell_type, pct) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    mutate(sample_type = case_match(sample_type, 'Baseline' ~ 'pre', 'Treat' ~ 'post'))

df %>% write_tsv(str_glue('{outdir}/figs8g-cc_endo.tsv'))

[0m2024-02-23 01:30:03 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [131]:
p <- df %>% 
    filter(.data[[gp]] %in% gp_lvls[[gp]]) %>%
    cell_comp_boxplot(x = c(gp, 'sample_type'), y = 'pct', pt_fill = gp, xangle = 60,
                      xorder = comb_order, fill_order = gp_lvls[[gp]]) +
    stat_compare_means(comparisons = gp_comp_map[[gp]]) +
    labs(y = 'Cell percent in Endo', fill = 'Response') +
    theme(axis.title.x = element_blank(), legend.position = 'right')
ggsave(filename = str_glue('{outdir}/figs8g-box_cc-endo.pdf'), plot = p, width = 6, height = 5)