# figure s7

- kernel: r_env, R 4.1.3
- date: 2024-0-22

## load

In [35]:
library(tidyverse)
library(logging)
library(ggpubr)
library(ggsci)
library(patchwork)

source('../../stage2/scripts/r_funcs.r')

theme_set(theme_pubr())
logging::basicConfig()
options(warn = -1)

outdir <- '../figures/figs7'
create_dir(outdir)

In [36]:
f_pat_gp <- '../tables/patient_info_v2.tsv'
gp <- 'response'
gp_lvls$response <- c('R', 'PR')
gp_comp_map$response <- list(
  c('R-pre', 'R-post'),
  c('PR-pre', 'PR-post'),
  c('R-pre', 'PR-pre'),
  c('R-post', 'PR-post')
)
gp_comp_diff_map$response <- list(c('R', 'PR'))
gp_comp_map_pre$response <- list(c('R-pre', 'PR-pre'))
comb_order <- c('R-pre', 'R-post', 'PR-pre', 'PR-post')

In [37]:
celltype_map <- list(
    'T_MKI67' = c('T_Prolif')
)

cell_state_map <- list(
    'cytotoxic' = c('CD8_ANXA1', 'CD8_CCL5', 'CD8_CX3CR1', 'CD8_FOS', 'CD8_GZMK', 'CD8_KLRB1'),
    'exhausted' = c('CD4_CXCL13', 'CD8_CXCL13', 'CD8_TYMS', 'T_MKI67', 'Treg_TNFRSF4'),
    'dying' = c('T_Mito'),
    'others' = c('CD4_KLRB1', 'CD8_CD74', 'CD8_IFIT1', 'T_IL7R', 'T_Ribo', 'Treg_LTB')
)
cell_state_order <- c('cytotoxic', 'exhausted', 'dying', 'others')
cell_state_color <- c('exhausted' = '#023fa5', 'cytotoxic' = 'red', 
                      'dying' = '#bb7784', 'others' = '#ff9639')

## s7a: TCR sharing diff (R vs PR)

In [None]:
f_node <- '../figures/fig3/fig3b-net_tcr_sharing-node_info.tsv'
f_edge_r_pre <- '../../stage4/a03_tcr/net_vlz_old/net_good_Baseline_edge.csv'
f_edge_r_post <- '../../stage4/a03_tcr/net_vlz_old/net_good_Treat_edge.csv'
f_edge_pr_pre <- '../../stage4/a03_tcr/net_vlz_old/net_poor_Baseline_edge.csv'
f_edge_pr_post <- '../../stage4/a03_tcr/net_vlz_old/net_poor_Treat_edge.csv'

# node info
df_node <- read_tsv(f_node, show_col_types = F) %>% 
    mutate(node_type = factor(node_type, levels = cell_state_order))

In [None]:
# pre: R vs PR
df <- cal_edge_weight_diff(a = read_csv(f_edge_r_pre, show_col_types = F), 
                           b = read_csv(f_edge_pr_pre, show_col_types = F)) %>% 
    mutate(wtype = if_else(weight.diff > 0, 'R > PR', 'R < PR'),
           comp_type = 'Pre-treatment: R vs PR')
# add post R vs PR
df <- cal_edge_weight_diff(a = read_csv(f_edge_r_post, show_col_types = F), 
                           b = read_csv(f_edge_pr_post, show_col_types = F)) %>% 
    mutate(wtype = if_else(weight.diff > 0, 'R > PR', 'R < PR'),
           comp_type = 'Post-treatment: R vs PR') %>%
    rbind.data.frame(df)
df %>% write_tsv(str_glue('{outdir}/figs7d-edge_info-r_vs_pr.tsv'))

[0m2024-02-22 18:12:38 INFO::a(25) - b(30) => 31 diff edges.[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-22 18:12:38 INFO::a(24) - b(45) => 47 diff edges.[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
df <- read_tsv(str_glue('{outdir}/figs7d-edge_info-r_vs_pr.tsv'), show_col_types = F)
p_ls <- lapply(X = c('Pre-treatment: R vs PR', 'Post-treatment: R vs PR'), FUN = function(ctype) {
    df_edge <- filter(df, comp_type == ctype)
    p <- exhau_cyto_netplot2(df_node = df_node, df_edge = df_edge, pt_fill = 'node_type', edge_color = 'wtype') +
        scale_fill_manual(values = cell_state_color) +
        labs(fill = 'Cell state', title = ctype, color = '') +
        theme_pubr(base_size = 14) +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = 'right', 
              legend.text = element_text(size = 14),
              legend.title = element_text(size = 14))
    return(p)
})
p_ls[[1]] <- p_ls[[1]] + theme(legend.position = 'none')
ggsave(filename = str_glue('{outdir}/figs7d-net_tcr_sharing-r_vs_pr.pdf'),
       plot = wrap_plots(p_ls), width = 18, height = 6)

## s7b: select clones in umap space

### prepare

In [None]:
# include all RNA cells (non-TCR-RNA shared cells with NA clonotype)
f_expand <- '../../stage4/a03_tcr/expansion/clone_expand_info_per_cell.csv'
f_obj <- '../../stage4/a01_data/seuobj/t.rds'

In [None]:
scrna <- readRDS(f_obj)

In [None]:
# rename cell types
scrna$celltype <- scrna$subtype
for (nm in names(celltype_map)) {
    scrna$celltype[scrna$subtype %in% celltype_map[[nm]]] <- nm
}
table(scrna$celltype)
# rename sample type
scrna$timepoint <- scrna$sample_type
scrna$timepoint[scrna$sample_type == 'Baseline'] <- 'pre'
scrna$timepoint[scrna$sample_type == 'Treat'] <- 'post'
scrna$timepoint <- factor(scrna$timepoint, levels = c('pre', 'post'))


  CD4_CXCL13    CD4_KLRB1    CD8_ANXA1     CD8_CCL5     CD8_CD74   CD8_CX3CR1 
       13152        20635        15789        17401         4820         5319 
  CD8_CXCL13      CD8_FOS     CD8_GZMK    CD8_IFIT1    CD8_KLRB1     CD8_TYMS 
       32900        20473        24466         5853         5067         8936 
          NK    NK_CX3CR1       T_IL7R       T_Mito      T_MKI67       T_Ribo 
       12992         5385        27242        16101        11065         8102 
    Treg_LTB Treg_TNFRSF4 
       15682        30645 

In [None]:
cell_clone_info <- read_csv(f_expand, show_col_types = F) %>%
    filter(!is.na(clonotype))  # RNA & TCR cells
loginfo('total %g RNA & TCR cells', nrow(cell_clone_info))

[0m2024-02-28 20:42:52 INFO::total 205824 RNA & TCR cells[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


### contracted exhausted, R

span more to less

In [None]:
show_ctypes <- c('T_MKI67', 'CD8_CXCL13', 'CD8_TYMS', 'CD8_CXCL13', 'T_Mito')
select_clone <- 'TRAV12-2;TRBV9_;TRBD1_TRAJ23;TRBJ1-5_TRA:CAVTDNQGGKLIF;TRB:CASSVTGGRGQPQHF_P21'
title_str <- 'Clone 1 (R, contracted in exhausted clusters)'

In [None]:
cells <- cell_clone_info %>% 
    filter(clonotype == select_clone) %>% 
    pull(barcode)
loginfo('total %g cells of selected clone', length(cells))

[0m2024-02-28 20:59:46 INFO::total 32 cells of selected clone[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
scrna$mylabel <- scrna$celltype
scrna$mylabel[!(scrna$celltype %in% show_ctypes)] <- ' '
p <- DimPlot(object = scrna, cells.highlight = cells, group.by = 'mylabel', split.by = 'timepoint', label = T) 
p$data$celltype <- scrna@meta.data[rownames(p$data), 'celltype']
p <- p + 
    geom_point(data = p$data[cells, ], aes(UMAP_1, UMAP_2, color = celltype), size = 2, alpha = 0.8) +
    scale_color_manual(values = ctype_colors, na.value = 'lightgray') +
    ggtitle(title_str) +
    theme(legend.position = 'none')
ggsave(filename = str_glue('{outdir}/fig4g-exhau_contract-R-umap-selected_clone.pdf'), 
       plot = p, width = 7, height = 4)

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


### contracted exhausted, PR

In [None]:
show_ctypes <- c('T_MKI67', 'CD8_CXCL13', 'CD8_TYMS')
select_clone <- 'TRAV27;TRBV20-1_;TRBD2_TRAJ49;TRBJ2-3_TRA:CAVFTGNQFYF;TRB:CSALAGGIDTQYF_P03'
title_str <- 'Clone 2 (PR, contracted in exhausted clusters)'

In [None]:
# get cells of selected clones
cells <- cell_clone_info %>% 
    filter(clonotype == select_clone) %>% 
    pull(barcode)
loginfo('total %g cells of selected clone', length(cells))

[0m2024-02-28 21:00:19 INFO::total 113 cells of selected clone[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
scrna$mylabel <- scrna$celltype
scrna$mylabel[!(scrna$celltype %in% show_ctypes)] <- ' '
p <- DimPlot(object = scrna, cells.highlight = cells, group.by = 'mylabel', split.by = 'timepoint', label = T) 
p$data$celltype <- scrna@meta.data[rownames(p$data), 'celltype']
p <- p + 
    geom_point(data = p$data[cells, ], aes(UMAP_1, UMAP_2, color = celltype), size = 2, alpha = 0.8) +
    scale_color_manual(values = ctype_colors, na.value = 'lightgray') +
    ggtitle(title_str) +
    theme(legend.position = 'none')
ggsave(filename = str_glue('{outdir}/fig4g-exhau_contract-PR-umap-selected_clone.pdf'), 
       plot = p, width = 7, height = 4)

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


### expanded cytotoxic, R

In [None]:
show_ctypes <- c('CD8_ANXA1', 'CD8_CCL5', 'CD8_CX3CR1', 'CD8_FOS', 'CD8_GZMK')
select_clone <- 'TRAV12-1;TRBV9_;_TRAJ39;TRBJ2-6_TRA:CVVNHAGNMLTF;TRB:CASSVEASGANVLTF_P26'
title_str <- 'Clone 3 (R, expanded in cytotoxic clusters)'

In [None]:
# get cells of selected clones
cells <- cell_clone_info %>% 
    filter(clonotype == select_clone) %>% 
    pull(barcode)
loginfo('total %g cells of selected clone', length(cells))

[0m2024-02-28 21:00:25 INFO::total 90 cells of selected clone[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
scrna$mylabel <- scrna$celltype
scrna$mylabel[!(scrna$celltype %in% show_ctypes)] <- ' '
p <- DimPlot(object = scrna, cells.highlight = cells, group.by = 'mylabel', split.by = 'timepoint', label = T) 
p$data$celltype <- scrna@meta.data[rownames(p$data), 'celltype']
p <- p + 
    geom_point(data = p$data[cells, ], aes(UMAP_1, UMAP_2, color = celltype), size = 2, alpha = 0.8) +
    scale_color_manual(values = ctype_colors, na.value = 'lightgray') +
    ggtitle(title_str) +
    theme(legend.position = 'none')
ggsave(filename = str_glue('{outdir}/fig4g-cyto_expand-R-umap-selected_clone.pdf'), 
       plot = p, width = 7, height = 4)

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


### expanded cytotoxic, PR

In [None]:
show_ctypes <- c('CD8_ANXA1', 'CD8_CCL5', 'CD8_FOS', 'CD8_GZMK', 'CD8_CXCL13', 'CD8_TYMS', 'T_MKI67')
select_clone <- 'TRAV20;TRBV4-2_;_TRAJ43;TRBJ2-7_TRA:CAVRGDNNDMRF;TRB:CASSHGTYSYEQYF_P20'
title_str <- 'Clone 4 (PR, expanded in cytotoxic clusters)'

In [None]:
# get cells of selected clones
cells <- cell_clone_info %>% 
    filter(clonotype == select_clone) %>% 
    pull(barcode)
loginfo('total %g cells of selected clone', length(cells))

[0m2024-02-28 21:00:31 INFO::total 149 cells of selected clone[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [None]:
scrna$mylabel <- scrna$celltype
scrna$mylabel[!(scrna$celltype %in% show_ctypes)] <- ' '
p <- DimPlot(object = scrna, cells.highlight = cells, group.by = 'mylabel', split.by = 'timepoint', label = T) 
p$data$celltype <- scrna@meta.data[rownames(p$data), 'celltype']
p <- p + 
    geom_point(data = p$data[cells, ], aes(UMAP_1, UMAP_2, color = celltype), size = 2, alpha = 0.8) +
    scale_color_manual(values = ctype_colors, na.value = 'lightgray') +
    ggtitle(title_str) +
    theme(legend.position = 'none')
ggsave(filename = str_glue('{outdir}/fig4g-cyto_expand-PR-umap-selected_clone.pdf'), 
       plot = p, width = 7, height = 4)

Rasterizing points since number of points exceeds 100,000.
To disable this behavior set `raster=FALSE`

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.


# previous

## clone change type (pre vs post): s7a-s7c

In [38]:
# cell source of 4 clone cat
f_cell_source <- '../../stage4/a05_clone_share/pre_post_share/clone_frac_in_sample/clone_cell_source-cell_state-clone_change_type.tsv'
# clone sharing intra- cell state level (with sharing_type)
f_share_type <- '../../stage4/a05_clone_share/pre_post_share/pre_post_share-per_clone-cell_state.tsv'
# merged clone frac/clone change type (sample level) & clone sharing info (no sharing_type)
f_clone_change <- '../../stage4/a05_clone_share/pre_post_share/clone_frac_in_sample/clone_pair_patient-clone_change_type-chisq.tsv'

clone_type_lvl <- c('Expanded', 'Contracted', 'Persistent')
clone_type_color_map <- c(
    'Expanded' = '#EE0000FF', 'Contracted' = '#3B4992FF', 'Persistent' = 'gray60'
)
ctype_colors <- c(
    CD8_ANXA1 = '#8C0439', CD8_CCL5 = '#A12253', CD8_CX3CR1 = '#B6416D', CD8_FOS = '#CC6088',
    CD8_GZMK = '#E17FA2', CD8_KLRB1 = '#F79EBD', CD4_CXCL13 = '#003CA5', CD8_CXCL13 = '#2F5DB1', 
    CD8_TYMS = '#5E7FBD', T_MKI67 = '#8DA1C9', Treg_TNFRSF4 = '#BDC3D6', T_Mito = '#7B7D7B',
    CD4_KLRB1 = '#DF9608', CD8_CD74 = '#E2A22E', CD8_IFIT1 = '#E5AE54', T_IL7R = '#E8BA7A', 
    T_Ribo = '#EBC6A0', Treg_LTB = '#EFD3C6'
)

### scatter show clone type: s7a

- desc: only pre-post shared clones (clone sharing defined intra cell state)

In [43]:
xy_tick_labels <- c(0.001, 0.01, 0.1)
used_states <- c('cytotoxic', 'exhausted', 'dying', 'others')

In [47]:
df <- read_tsv(f_clone_change, show_col_types = F) %>%
    filter(!grepl('NK', cell_state)) %>%  # no NK cells
    filter(cell_state %in% used_states) %>%
    select(-c('mandard_group', 'response_degree', 'treatment_group')) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient')
df <- read_tsv(f_share_type, show_col_types = F) %>% 
    filter(share_type == 'pre_post_share') %>%
    select(patient, patient_id, clonotype, cell_state, share_type) %>% 
    distinct() %>%
    merge(df, by = c('patient', 'patient_id', 'clonotype', 'cell_state'))
df %>% write_tsv(str_glue('{outdir}/figs7a-clone_change_type-clone_frac_pre_vs_post-by_state-only_share.tsv'))

loginfo('%g records', nrow(df))

[0m2024-02-22 16:19:58 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-22 16:19:59 INFO::4240 records[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [48]:
# scatter show expand type use clone fraction
p <- read_tsv(str_glue('{outdir}/figs7a-clone_change_type-clone_frac_pre_vs_post-by_state-only_share.tsv'), show_col_types = F) %>%
    mutate(clone_change_type = factor(clone_change_type, levels = clone_type_lvl),
           frow = factor(.data[[gp]], gp_lvls[[gp]]),
           fcol = factor(cell_state, used_states)) %>%
    ggscatter(x = 'clone_frac_in_sample_pre', y = 'clone_frac_in_sample_post', color = 'clone_change_type', alpha = 0.7, cor.coef = T) +
    geom_abline(slope = 1, intercept = 0) +
    facet_grid(frow ~ fcol)  +
    scale_x_continuous(breaks = xy_tick_labels) +
    scale_y_continuous(breaks = xy_tick_labels) +
    scale_color_manual(values = clone_type_color_map) +
    guides(color = guide_legend(override.aes = list(size = 4))) +
    coord_trans(x = lognp_trans(n = 0.001), y = lognp_trans(n = 0.001)) +
    labs(x = 'Clone fraction in Baseline (RNA & TCR)', y = 'Clone fraction in Treat (RNA & TCR)',
         color = 'Clone type') +
    theme(legend.position = 'right')
ggsave(filename = str_glue('{outdir}/figs7a-scatter-clone_chage_type-clone_frac_pre_vs_post-by_state-only_share.pdf'),
       plot = p, width = 15, height = 5)

### scatter show clone type-v2: s7a

- desc: only pre-post shared clones (pre/post fraction != 0)

In [32]:
xy_tick_labels <- c(0.001, 0.01, 0.1)
used_states <- c('cytotoxic', 'exhausted', 'dying', 'others')

In [33]:
df <- read_tsv(f_clone_change, show_col_types = F) %>%
    filter(!grepl('NK', cell_state)) %>%  # no NK cells
    filter(cell_state %in% used_states) %>%
    select(-c('mandard_group', 'response_degree', 'treatment_group')) %>%
    add_clin_info(ftsv = f_pat_gp, columns = gp, merge_by = 'patient') %>%
    filter(clone_frac_in_sample_pre != 0,
           clone_frac_in_sample_post != 0)
df %>% write_tsv(str_glue('{outdir}/figs7a-clone_change_type-clone_frac_pre_vs_post-by_state-only_share-v2.tsv'))

loginfo('%g records', nrow(df))

[0m2024-02-22 18:30:44 INFO::these clinial info will be added: response[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m
[0m2024-02-22 18:30:44 INFO::8327 records[0m[22m[23m[24m[27m[28m[29m[39m[49m[0m[0m[22m[23m[24m[27m[28m[29m[39m[49m


In [34]:
# scatter show expand type use clone fraction
p <- read_tsv(str_glue('{outdir}/figs7a-clone_change_type-clone_frac_pre_vs_post-by_state-only_share-v2.tsv'), show_col_types = F) %>%
    mutate(clone_change_type = factor(clone_change_type, levels = clone_type_lvl),
           frow = factor(.data[[gp]], gp_lvls[[gp]]),
           fcol = factor(cell_state, used_states)) %>%
    ggscatter(x = 'clone_frac_in_sample_pre', y = 'clone_frac_in_sample_post', color = 'clone_change_type', alpha = 0.7, cor.coef = T) +
    geom_abline(slope = 1, intercept = 0) +
    facet_grid(frow ~ fcol)  +
    scale_x_continuous(breaks = xy_tick_labels) +
    scale_y_continuous(breaks = xy_tick_labels) +
    scale_color_manual(values = clone_type_color_map) +
    guides(color = guide_legend(override.aes = list(size = 4))) +
    coord_trans(x = lognp_trans(n = 0.001), y = lognp_trans(n = 0.001)) +
    labs(x = 'Clone fraction in Baseline (RNA & TCR)', y = 'Clone fraction in Treat (RNA & TCR)',
         color = 'Clone type') +
    theme(legend.position = 'right')
ggsave(filename = str_glue('{outdir}/figs7a-scatter-clone_chage_type-clone_frac_pre_vs_post-by_state-only_share-v2.pdf'),
       plot = p, width = 15, height = 5)

### subtype source of interested clones: s7b

Contracted clones in cytotoxic clusters, Expanded clones in exhausted clusters

In [41]:
used_cat <- c('Expanded clones in exhausted clusters', 'Contracted clones in cytotoxic clusters')

In [42]:
# plot
p <- read_tsv(f_cell_source, show_col_types = F) %>%
    filter(!grepl('NK', celltype)) %>%
    filter(clone_cat %in% used_cat) %>%
    count(.data[[gp]], sample_type, clone_cat, celltype, name = 'n_cell') %>%
    mutate(response_stype = factor(paste0(.data[[gp]], '-', sample_type), comb_order),
           celltype = factor(celltype, names(ctype_colors))) %>%
    ggbarplot(x = 'n_cell', y = 'response_stype', fill = 'celltype', position = position_fill()) +
    facet_wrap(~ clone_cat) +
    scale_fill_manual(values = ctype_colors) +
    guides(fill = guide_legend(nrow = 3)) +
    labs(x = 'Cell proportion', y = '', fill = '')

ggsave(filename = str_glue('{outdir}/figs7b-bar-cell_source-exhauExpand_cytoContract.pdf'), 
       plot = p, width = 12, height = 5)