In [None]:
require(tidyverse)
require(ggthemes)
require(harmony)
require(Seurat)
require(future)
require(furrr)
plan(multisession, workers = 10)
set.seed(1)

# load annotated pathology regions

In [None]:
pathology_regions = data.table::fread('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH/figure2/pathology_regions_postQC.csv')
slice_sample(pathology_regions, n = 20)

# merge merfish objects

In [None]:
merfish_files = list.files(path = '/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH_niches/labeled_seurat_objects/renamed_cell_states/', pattern = "annotated_.*.rds", full.names = TRUE)
merfish_files = merfish_files[grepl(pattern = paste(unique(pathology_regions$PatientID), collapse = '|'), x = merfish_files)]
length(merfish_files)
merfish_files %>% writeLines

In [None]:
getCounts = function(f){
    temp = readr::read_rds(f)
    temp_counts = GetAssayData(temp, 'counts')
    return(temp_counts)
}

In [None]:
system.time({
    counts = future_map(merfish_files, getCounts)
})
length(counts)

In [None]:
system.time({
    merged_counts = do.call(cbind, counts)
})
dim(merged_counts)

In [None]:
rm(counts)
rm(temp)
gc()

# create seurat object

In [None]:
merged_merfish = CreateSeuratObject(counts = merged_counts)
merged_merfish

In [None]:
head(merged_merfish)

In [None]:
merged_merfish@meta.data$technology = 'MERFISH'

In [None]:
merged_merfish@meta.data$orig.ident = gsub(rownames(merged_merfish@meta.data), pattern = "_.*", replacement = "") %>% as.factor

In [None]:
head(merged_merfish)

# collect metadata

In [None]:
merfish_annotations = data.table::fread('/n/data1/bwh/medicine/korsunsky/lab/mup728/CRC_MERFISH_niches/labeled_seurat_objects/renamed_cell_states/renamed_merfish_cell_types.csv')

colnames(merfish_annotations)

In [None]:
merfish_annotations %>%
    select(cell, orig.ident, ClusterTop, knn_renamed_cell_states) %>%
    head

In [None]:
temp = merfish_annotations %>%
    mutate(technology = 'MERFISH') %>%
    select(cell, orig.ident, ClusterTop, knn_renamed_cell_states, technology) %>%
    as.data.frame
rownames(temp) = temp$cell

In [None]:
temp = temp %>% filter(cell %in% rownames(merged_merfish@meta.data))
dim(temp)

In [None]:
tail(temp)

In [None]:
dim(temp)
dim(merged_merfish)

In [None]:
merged_merfish@meta.data$cell = rownames(merged_merfish@meta.data)

In [None]:
merged_merfish@meta.data = left_join(merged_merfish@meta.data, temp)

In [None]:
merged_merfish@meta.data$ClusterTop[is.na(merged_merfish@meta.data$ClusterTop)] = 'Mast'
merged_merfish@meta.data$knn_renamed_cell_states[is.na(merged_merfish@meta.data$knn_renamed_cell_states)] = 'Mast'

In [None]:
merged_merfish@meta.data %>% head

In [None]:
rm(temp)
gc()

In [None]:
sum(unique(pathology_regions$sample_name) %in% unique(merged_merfish@meta.data$orig.ident))
length(pathology_regions$sample_name %>% unique)
length(unique(merged_merfish@meta.data$orig.ident))

In [None]:
merged_merfish@meta.data$orig.ident[merged_merfish@meta.data$orig.ident == 'G4659-CP-MET'] = 'G4659'

In [None]:
c(unique(pathology_regions$PatientID))[!unique(pathology_regions$PatientID) %in% unique(merged_merfish@meta.data$orig.ident)]

unique(merged_merfish@meta.data$orig.ident)[!unique(merged_merfish@meta.data$orig.ident) %in% unique(pathology_regions$PatientID)]

In [None]:
merged_merfish

# normalize, scale, pca, 

In [None]:
normFactor = merged_merfish@meta.data %>%
    select(orig.ident, nCount_RNA) %>%
    group_by(orig.ident) %>%
    summarize(medianCounts = median(nCount_RNA)) %>%
    pull(medianCounts) %>%
    median()
normFactor

In [None]:
ls()

In [None]:
options(future.globals.maxSize = 31457280000)
plan(sequential)
merged_merfish = NormalizeData(merged_merfish, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    FindVariableFeatures() %>% 
    ScaleData() %>%
    RunPCA(npcs=50)
    #singlecellmethods::RunBalancedPCA(weight.by = 'orig.ident', npcs=50)
merged_merfish

# cache

In [None]:
readr::write_rds(merged_merfish, 'harmonized_merfish_20241105.rds')

# harmonize

In [None]:
merged_merfish

In [None]:
ElbowPlot(merged_merfish, ndims = 50)

In [None]:
Embeddings(merged_merfish, 'pca') %>% head

In [None]:
merged_merfish@meta.data %>% head

In [None]:
unique(merged_merfish@meta.data$orig.ident) %>% length

In [None]:
# require(tidyverse)
# require(ggthemes)
# require(harmony)
# require(Seurat)
# set.seed(1)

require(tidyverse)
require(ggthemes)
require(harmony)
require(Seurat)
set.seed(1)

# merged_merfish = readr::read_rds('harmonized_merfish_20241105.rds')

# system.time({
# merged_merfish = merged_merfish %>%
#     harmony::RunHarmony(
#         "orig.ident", 
#         plot_convergence = TRUE, 
#         lambda = NULL,
#         max.iter = 10,
#         early_stop = F
# )})

# system.time({
# merfish_hpca = harmony::RunHarmony(data_mat = Embeddings(merged_merfish, 'pca')[,1:15],
#                                   meta_data = merged_merfish@meta.data,
#                                   vars_use = 'orig.ident', verbose = TRUE)})

# readr::write_rds(merfish_hpca, 'merfish_hpca_20250212.rds')

merfish_hpca = readr::read_rds('merfish_hpca_20250212.rds')


In [None]:
dim(merfish_hpca)

In [None]:
merged_merfish

In [None]:
merged_merfish[['harmony']] = Seurat::CreateDimReducObject(
    embeddings = merfish_hpca,
    assay = 'RNA', 
    key = 'HPCA_', 
    global = TRUE
)

In [None]:
Embeddings(merged_merfish, 'harmony') %>% head

In [None]:
require(scattermore)
ggplot() +
    geom_scattermost(as.matrix(Embeddings(merged_merfish, 'harmony')[, c('HPCA_1', 'HPCA_2')])) +
    ggpubr::theme_pubr()


In [None]:
ggplot() +
    geom_scattermost(as.matrix(Embeddings(merged_merfish, 'harmony')[, c('HPCA_3', 'HPCA_4')])) +
    ggpubr::theme_pubr()

## cache

In [None]:
readr::write_rds(merged_merfish, 'harmonized_merfish_20241105.rds')

In [None]:
getwd()

# umap

In [None]:
system.time({U = uwot::umap(Embeddings(merged_merfish, 'harmony'), 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               fast_sgd = TRUE)})
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')

In [None]:
rownames(U$fgraph) = colnames(U$fgraph) = rownames(merged_merfish@meta.data)
merged_merfish[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)

In [None]:
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(merged_merfish)
merged_merfish[['humap_fgraph']] = new_graph

In [None]:
rm(new_graph)
rm(U)
gc()

# cache

In [None]:
readr::write_rds(merged_merfish, 'merged_harmonized_merfish_20241018.rds')

# plot by technology

In [None]:
merged_merfish = readr::read_rds('merged_harmonized_merfish_20241018.rds')
merged_merfish

In [None]:
DimPlot(merged_merfish, 
        reduction = 'humap', 
        raster = TRUE, 
        group.by = 'technology', 
        shuffle = TRUE, 
        split.by = 'technology') +
theme_bw()

# plot by cell lineage

In [None]:
DimPlot(object = merged_merfish %>% subset(technology == 'MERFISH'), 
        reduction = 'humap', 
        raster = TRUE, 
        #group.by = 'knn_renamed_cell_states', 
        shuffle = TRUE, 
        label = FALSE) +
theme_bw()

In [None]:
DimPlot(object = merged_merfish %>% subset(technology == 'MERFISH'),
        reduction = 'humap', 
        raster = TRUE, 
        group.by = 'ClusterTop', 
        shuffle = TRUE, 
        label = TRUE) +
theme_bw()

In [None]:
merged_merfish@meta.data$knn_renamed_cell_states[merged_merfish@meta.data$technology == 'MERFISH'] %>% table

In [None]:
DimPlot(object = merged_merfish %>% subset(technology == 'MERFISH'),
        reduction = 'humap', 
        raster = TRUE, 
        group.by = 'knn_renamed_cell_states') +
theme_bw() +
theme(legend.position = 'none')