In [None]:
library(tidyverse)
library(Seurat)

In [None]:
da_results_nhg = qs::qread('_targets/objects/da_results_nhg_Agrp___all.obob5v5__v__all.obobBL6')
da_results_nhg %>% head

In [None]:
nhm = qs::qread('_targets/objects/nhm_Agrp___obob5v5')
nhm %>% head

In [None]:
exp = qs::qread('_targets/objects/obj_Agrp')
exp

In [None]:
selected_groups = c("none")
selected_nhoods = da_results_nhg %>% filter(restored_grouping %in% selected_groups) %>% pull(Nhood)

In [None]:
selected_cells = nhm[, selected_nhoods] %>% 
    rowSums %>% 
    enframe(name="cell_id", value="nhood_count") %>% 
    filter(nhood_count > 0) %>%
    distinct(cell_id) %>%
    pull(cell_id)

In [None]:
dim(nhm)

In [None]:
get_nhg_cells = function(da_results_nhg, nhm, selected_group){
    selected_nhoods = da_results_nhg %>% filter(restored_grouping == selected_group) %>% pull(Nhood)
    selected_cells = nhm[, selected_nhoods] %>% 
        rowSums %>% 
        enframe(name="cell_id", value="nhood_count") %>% 
        filter(nhood_count > 0) %>%
        distinct(cell_id) %>%
        pull(cell_id)
    selected_cells
}

In [None]:
get_nhg_cells(da_results_nhg, nhm, 'none') %>% length

In [None]:
get_cell_nh = function(nhm){
    nhm[1,] %>% colSums %>% enframe %>% filter(value > 0) %>% pull(name)
    }

In [None]:
get_cell_nh(nhm[1,])

In [None]:
annotate_nhg = function(da_results_nhg){
    nhg_annotation = da_results_nhg %>%
        select(Nhood, restored_grouping) %>%
        distinct %>%
        mutate(Nhood = as.character(Nhood))
    nhg_annotation
}

In [None]:
nhg_annotation = annotate_nhg(da_results_nhg)
nhg_annotation %>% head

In [None]:
summarise_nhg_annotation = function(nhg_annotation){
    grouping_summary = nhg_annotation %>% 
        group_by(restored_grouping) %>% 
        summarise(n = n()) %>%
        ungroup() %>%
        mutate(frac_nhoods = n/sum(n))
    grouping_summary
}

In [None]:
grouping_summary = summarise_nhg_annotation(nhg_annotation)
grouping_summary

In [None]:
nhg2cell = function(nhm, da_results_nhg) {
    nhg_annotation = annotate_nhg(da_results_nhg)
    grouping_summary = summarise_nhg_annotation(nhg_annotation)
    nhg_tib = nhm %>% 
        rownames_to_column %>% 
        pivot_longer(cols = !contains("row")) %>%
        rename(Nhood = name) %>%
        filter(value != 0) %>%
        select(-value) %>%
        left_join(nhg_annotation, by = "Nhood") %>%
        left_join(grouping_summary, by="restored_grouping") %>%
        select(-n) %>%
        select(-Nhood) %>%
        mutate(restored_grouping = as.factor(restored_grouping)) %>%
        group_by(rowname, restored_grouping) %>%
        mutate(group_weight = n() * (1-frac_nhoods)) %>%
        ungroup() %>%
        group_by(rowname) %>%
        mutate(total_count = n()) %>%
        ungroup() %>%
        distinct %>%
        mutate(weight = group_weight/total_count) %>%
        arrange(desc(weight)) %>%
        distinct(rowname, .keep_all = TRUE) %>%
        select(-group_weight, -total_count, -weight, -frac_nhoods) %>%
        mutate(fgf1_grouping = case_when( (str_detect(restored_grouping, "pos") & 
                                          !str_detect(restored_grouping, "BL6")) ~ "pos",
                                         (str_detect(restored_grouping, "neg") & 
                                          !str_detect(restored_grouping, "BL6")) ~ "neg",
                                         TRUE ~ "none")
              )
    nhg_tib
}


nhg2cell(nhm, da_results_nhg) %>% head

In [None]:
nhgc = nhg2cell(nhm, da_results_nhg)
nhgc %>% filter(!(restored_grouping %in% c("pos_restored", "none"))) %>% pull(restored_grouping) %>% as.character %>% unique

In [41]:
nhgc %>% filter(!(restored_grouping %in% c("pos_restored", "none"))) %>% pull(restored_grouping) %>% as.character %>% unique %>% paste0(collapse='.')

In [49]:
qs::qread("_targets/objects/obj_D")

ERROR: Error in qs::qread(""): Failed to open . Check file path.


In [48]:
get_seurat_nhg_markers = function(seurat_obj, nhgc, grouping_col, group_a, group_b='', tag=''){
    nhgc['grouping'] = nhgc[grouping_col]
    group_a = stringr::str_split(group_a, pattern=fixed('.')) %>% unlist
    cells_a = nhgc %>%
        filter(grouping %in% group_a) %>%
        pull(rowname)
    if (group_b == ''){
        group_b = nhgc %>%
            filter(!(grouping %in% group_a)) %>%
            pull(grouping) %>%
            as.character %>%
            unique %>%
            paste0(collapse='.')
    }
    group_b = stringr::str_split(group_b, pattern=fixed('.')) %>% unlist
    cells_b = nhgc %>%
        filter(grouping %in% group_b) %>%
        pull(rowname)
    markers = Seurat::FindMarkers(seurat_obj, ident.1=cells_a, ident.2=cells_b, slot="data", assay="SCT", verbose=TRUE,
                                  min.cells.group = 10, 
                                  min.cells.feature = 10,
                                  min.pct = 0.01,
                                  logfc.threshold = 0,
                                  only.pos = FALSE) 
    markers['tag'] = tag
    markers
}

In [43]:
add_gsea_cols_to_seurat_marker_results = function(sm_results){
    sm_results = sm_results %>% 
        rownames_to_column(var = "GeneID")
    converted = gprofiler2::gconvert(query = sm_results$GeneID,
                                     organism = "mmusculus",
                                     target = "ENSG",
                                     mthreshold = 1,
                                     filter_na = FALSE) %>%
                    mutate(ensmusg = target) %>%
                    mutate(GeneID = input) %>%
                    select(GeneID, ensmusg)
    sm_results = sm_results %>%
        left_join(converted) %>%
        distinct(ensmusg, .keep_all=TRUE) %>%
        mutate(gsea_sort_score = -log10(p_val_adj) * avg_log2FC) %>%
        arrange(desc(gsea_sort_score))
    sm_results
}

In [44]:
markers = get_seurat_nhg_markers(exp_neuron, nhgc, 'fgf1_grouping', c('pos'))

In [45]:
markers

GeneID,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,tag
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Cntn5,5.169331e-69,1.0138835,0.966,0.858,1.390860e-64,
Tcf4,3.497004e-65,-0.6977552,0.923,0.981,9.409038e-61,
Grik2,1.298251e-64,0.5743450,1.000,1.000,3.493075e-60,
Enox1,1.005230e-50,-0.5525478,0.975,0.992,2.704671e-46,
Fam189a1,4.541651e-43,0.4849314,0.978,0.930,1.221977e-38,
Syt1,3.037669e-42,0.6642320,0.984,0.945,8.173152e-38,
Galntl6,2.954599e-37,0.8774001,0.910,0.844,7.949644e-33,
Cadm1,1.751590e-36,0.3975767,1.000,0.999,4.712827e-32,
Plcb4,6.338584e-36,0.4312180,0.988,0.974,1.705459e-31,
Pde3a,8.876403e-36,0.8548929,0.486,0.283,2.388285e-31,


In [47]:
tibble(a = c(1,2,3), b=c('a', 'b', NULL))

ERROR: [1m[33mError[39m:[22m
[33m![39m Tibble columns must have compatible sizes.
[36m•[39m Size 3: Existing data.
[36m•[39m Size 2: Column `b`.
[34mℹ[39m Only values of size one are recycled.


In [30]:
markers %>% 
    rownames_to_column(var = "GeneID") %>%
    mutate(gsea_sort_score = -log10(p_val_adj) * avg_log2FC) %>%
    arrange(desc(gsea_sort_score))

GeneID,p_val,avg_log2FC,pct.1,pct.2,p_val_adj,tag,gsea_sort_score
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
Cntn5,3.145759e-69,1.0131910,0.970,0.854,6.764012e-65,,65.016262
Grik2,1.126015e-64,0.5741145,1.000,1.000,2.421157e-60,,34.226398
Nrg1,7.015045e-34,1.1658969,0.914,0.877,1.508375e-29,,33.602886
Schip1,6.444850e-31,1.1936295,0.927,0.925,1.385772e-26,,30.865239
Galntl6,3.133208e-37,0.8777898,0.908,0.839,6.737023e-33,,28.239841
Pde3a,7.189954e-36,0.8524434,0.486,0.283,1.545984e-31,,26.264458
Syt1,6.761176e-42,0.6641989,0.983,0.943,1.453788e-37,,24.467426
Thsd7b,3.465978e-26,1.1037375,0.379,0.224,7.452546e-22,,23.319430
Nell1,8.932171e-28,0.9327506,0.460,0.286,1.920595e-23,,21.188889
St18,7.775017e-33,0.7244896,0.582,0.388,1.671784e-28,,20.124017


In [26]:
Sys.getpid()

In [50]:
nhgc

rowname,restored_grouping,fgf1_grouping
<chr>,<fct>,<chr>
TCATTTGCACTCACTC_1_2,pos_restored,pos
ATCAGGTGTCGCGTCA_2_1,pos_FGF1,pos
ATTACCTAGACCTCAT_4_1,pos_FGF1,pos
GTGTAACTCTTCCCGA_1_2,pos_FGF1,pos
TGCGGGTAGAATTTGG_1_2,pos_FGF1,pos
ACAAGCTAGCCGAATG_4_2,pos_FGF1,pos
TACTTCAAGTGGACTG_2_3,pos_FGF1,pos
TCCTCGAGTACGAAAT_3_3,pos_FGF1,pos
TGTTCTAAGGACAGCT_2_1,pos_away,pos
TCACTCGGTTCATCTT_2_1,pos_away,pos


In [None]:
nhm_long %>% group_by(restored_grouping) %>% summarise(n = n())

In [None]:
nhm_long %>% group_by(fgf_grouping) %>% summarise(n = n())

In [None]:
nhm_long %>% group_by(restored_grouping) %>% summarise(n = n())

In [None]:
none_cells = nhm_long %>% filter(restored_grouping == 'none') %>% pull(rowname)

In [27]:
exp_neuron = qs::qread('../00_preprocessing/_targets/objects/exp_labelled_neuron')
# exp_other = qs::qread('_targets/objects/obj_Agrp')

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
DimPlot(exp_neuron, reduction = "umap", cells.highlight = none_cells, label = TRUE,  repel = TRUE)

In [None]:
neg_restored_cells = nhm_long %>% filter(restored_grouping == 'neg_restored') %>% pull(rowname)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
DimPlot(exp_neuron, reduction = "umap", cells.highlight = neg_restored_cells, label = TRUE,  repel = TRUE)

In [None]:
pos_restored_cells = nhm_long %>% filter(restored_grouping == 'pos_restored') %>% pull(rowname)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
DimPlot(exp_neuron, reduction = "umap", cells.highlight = pos_restored_cells, label = TRUE,  repel = TRUE)

In [None]:
pos_FGF1_cells = nhm_long %>% filter(restored_grouping == 'pos_FGF1') %>% pull(rowname)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
DimPlot(exp_neuron, reduction = "umap", cells.highlight = pos_FGF1_cells, label = TRUE,  repel = TRUE)

In [None]:
pos_away_cells = nhm_long %>% filter(restored_grouping == 'pos_away') %>% pull(rowname)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
DimPlot(exp_neuron, reduction = "umap", cells.highlight = pos_away_cells, label = TRUE,  repel = TRUE)

In [None]:
pos_restored_markers = Seurat::FindMarkers(exp_neuron, ident.1=pos_restored_cells, ident.2=none_cells, slot="data", assay="SCT", verbose=TRUE)
pos_restored_markers

In [None]:
top10g_markers = pos_restored_markers %>% filter(p_val_adj < 0.05) %>% rownames %>% head(9)
DefaultAssay(exp_neuron) = "RNA"
options(repr.plot.width=15, repr.plot.height=15)
fp = FeaturePlot(exp_neuron,
            features = top10g_markers,
           pt.size=5,
           order=TRUE,
           min.cutoff="q01",
           max.cutoff="q99",
           raster=TRUE,
           raster.dpi=c(1024, 1024),
           ncol=3
           )
fp

In [None]:
top10g_markers = pos_restored_markers %>% filter(p_val_adj < 0.05) %>% rownames %>% head(9)
DefaultAssay(exp) = "RNA"
options(repr.plot.width=15, repr.plot.height=15)
fp = FeaturePlot(exp,
            features = top10g_markers,
           pt.size=5,
           order=TRUE,
           min.cutoff="q01",
           max.cutoff="q99",
           raster=TRUE,
           raster.dpi=c(1024, 1024),
           ncol=3
           )
fp