In [None]:
library(Seurat)
library(ggplot2)
library(openxlsx)
library(Matrix)

# Load data

In [None]:
# Mouse batch 1 
# Not included due to high fraction of Ttr
mouse1 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/210420_DVC_seurat_obj.rds")
mouse1$hash.mcl.ID <- gsub("MIKG201201-|MIKG201202-", "", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("ID", "", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("AM1213-acute-DIO", "A1-A", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("AM1213-7-days-DIO", "A1-C", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("AM833-acute-DIO", "A8-A", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("AM833-7-days-DIO", "A8-C", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("Vehicle-acute-DIO", "V-A", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("Vehicle-7-days-DIO", "V-C", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("Vehicle-acute-lean", "L-A", mouse1$hash.mcl.ID)
mouse1$hash.mcl.ID <- gsub("VM-7-days-DIO", "WM-C", mouse1$hash.mcl.ID)
mouse1$treatment <- gsub("-[0-9]+", "", mouse1$hash.mcl.ID)
mouse1$pool <- mouse1$orig.ident
mouse1$run <- "mouse1"
mouse1[["percent.mt"]] <- PercentageFeatureSet(mouse1, pattern = "^mt-")
mouse1 <- subset(mouse1, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)


# Mouse batch 2
mouse2 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/210518_DVC_seurat_obj.rds")
mouse2$hash.mcl.ID <- gsub("Vehicle-acute-DIO-ID26", "Vehicle-acute-lean-ID26", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("AM1213-acute-DIO", "A1-A", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("AM1213-7-days-DIO", "A1-C", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("AM833-acute-DIO", "A8-A", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("AM833-7-days-DIO", "A8-C", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("Vehicle-acute-DIO", "V-A", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("Vehicle-7-days-DIO", "V-C", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("Vehicle-acute-lean", "L-A", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("VM-7-days-DIO", "WM-C", mouse2$hash.mcl.ID)
mouse2$hash.mcl.ID <- gsub("ID", "", mouse2$hash.mcl.ID)
mouse2$treatment <- gsub("-[0-9]+", "", mouse2$hash.mcl.ID)
mouse2$pool <- mouse2$orig.ident
mouse2$run <- "mouse2"
mouse2[["percent.mt"]] <- PercentageFeatureSet(mouse2, pattern = "^mt-")
mouse2 <- subset(mouse2, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)


# Mouse batch 3
mouse3 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/210708_DVC_1_seurat_obj.rds")
mouse3$treatment <- gsub("-[0-9]+", "", mouse3$hash.mcl.ID)
mouse3$pool <- mouse3$orig.ident
mouse3$run <- "mouse3" 
mouse3[["percent.mt"]] <- PercentageFeatureSet(mouse3, pattern = "^mt-")
mouse3 <- subset(mouse3, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)


# Mouse batch 4
mouse4 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/210708_DVC_2_seurat_obj.rds")
mouse4$treatment <- gsub("-[0-9]+", "", mouse4$hash.mcl.ID)
mouse4$pool <- mouse4$orig.ident
mouse4$run <- "mouse4"
mouse4[["percent.mt"]] <- PercentageFeatureSet(mouse4, pattern = "^mt-")
mouse4 <- subset(mouse4, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)


# Mouse batch 5
mouse5 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/210708_DVC_3_seurat_obj.rds")
A8.barcodes <- read.csv("/projects/mludwig/DVC/data/mouse_snRNA/old_Dec22/SI-TT-A8_barcodes.txt", header = F)
C7.barcodes <- read.csv("/projects/mludwig/DVC/data/mouse_snRNA/old_Dec22/SI-TT-C7_barcodes.txt", header = F)
A8.C7.barcodes <- intersect(A8.barcodes$V1, C7.barcodes$V1)
idx.remove <- which(gsub(".*_", "", colnames(mouse5)) %in% A8.C7.barcodes & mouse5$orig.ident == "SI-TT-C7")
mouse5 <- subset(mouse5, cells = idx.remove, invert = T)
mouse5$treatment <- gsub("-[0-9]+", "", mouse5$hash.mcl.ID)
mouse5$pool <- mouse5$orig.ident
mouse5$run <- "mouse5"
mouse5[["percent.mt"]] <- PercentageFeatureSet(mouse5, pattern = "^mt-")
mouse5 <- subset(mouse5, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)


# Mouse batch 6
mouse6 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/211015_DVC_seurat_obj.rds")
mouse6$treatment <- gsub("-[0-9]+", "", mouse6$hash.mcl.ID)
mouse6$pool <- mouse6$orig.ident
mouse6$run <- "mouse6"
mouse6[["percent.mt"]] <- PercentageFeatureSet(mouse6, pattern = "^mt-")
mouse6 <- subset(mouse6, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)

# Resequencing 1
mouse7 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/220208_DVC-reseq_0141_seurat_obj.rds")
mouse7$hash.mcl.ID <- gsub("VM", "WM", mouse7$hash.mcl.ID)
mouse7$treatment <- gsub("-[0-9]+", "", mouse7$hash.mcl.ID)
mouse7$pool <- mouse7$orig.ident
mouse7$run <- "mouse7"
mouse7[["percent.mt"]] <- PercentageFeatureSet(mouse7, pattern = "^mt-")
mouse7 <- subset(mouse7, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)
# Remove pools originating from mouse batch 1 
mouse7 <- subset(mouse7, pool %in% unique(mouse1$pool), invert = T)

# Resequencing 2
mouse8 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/220208_DVC-reseq_0142_seurat_obj.rds")
mouse8$hash.mcl.ID <- gsub("VM", "WM", mouse8$hash.mcl.ID)
mouse8$treatment <- gsub("-[0-9]+", "", mouse8$hash.mcl.ID)
mouse8$pool <- mouse8$orig.ident
mouse8$run <- "mouse8"
mouse8[["percent.mt"]] <- PercentageFeatureSet(mouse8, pattern = "^mt-")
mouse8 <- subset(mouse8, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)

# Resequencing 3
mouse9 <- readRDS("/projects/mludwig/DVC/data/mouse_snRNA/220401_DVC_reseq_seurat_obj.rds")
mouse9$treatment <- gsub("-[0-9]+", "", mouse9$hash.mcl.ID)
mouse9$pool <- mouse9$orig.ident
mouse9$run <- "mouse9"
mouse9[["percent.mt"]] <- PercentageFeatureSet(mouse9, pattern = "^mt-")
mouse9 <- subset(mouse9, nCount_RNA > 5e+04 | nCount_RNA < 500 | percent.mt  > 2, invert = T)

# Merge data

In [None]:
# Remove pools that have been resequenced
resequenced.pools <- unique(c(mouse7$pool, mouse8$pool, mouse9$pool))
# All mouse batch 1 are of bad quality
# All pools originating from mouse batch 3 have been resequenced
# All pools originating from mouse batch 4 have been resequenced 
mouse.list <- list(mouse2, mouse5, mouse6) 

names(mouse.list) <- c("mouse2", "mouse5", "mouse6")

for (i in 1:length(mouse.list)) {
  mouse.list[[i]] <- subset(mouse.list[[i]], pool %in% resequenced.pools, invert = T)
}

mouse.list[["mouse7"]] <- mouse7
mouse.list[["mouse8"]] <- mouse8
mouse.list[["mouse9"]] <- mouse9

# Merge 
mouse <- merge(x = mouse.list[[1]], y = mouse.list[-1])
mouse$hash.ID <- mouse$hash.mcl.ID

# Normalize

In [None]:
mouse <- SCTransform(mouse, verbose = F, method = "qpoisson")

# Run dimensionality reduction and clustering

In [None]:
# PCA
VariableFeatures(mouse[["SCT"]]) <- rownames(mouse[["SCT"]]@scale.data)
mouse <- RunPCA(mouse, verbose = F, npcs = 100)

# Select number of PCs
ElbowPlot(mouse, ndims = 100)

# UMAP
mouse <- RunUMAP(mouse, dims = 1:30, n.neighbors = 50)

# Find clusters

In [None]:
mouse <- FindNeighbors(mouse, dims = 1:30, k.param = 50, verbose = F)
mouse <- FindClusters(mouse, resolution = 0.1, verbose = F)
mouse <- FindClusters(mouse, resolution = 1, verbose = F)

# Save 

In [None]:
mouse@meta.data <- mouse@meta.data[, c("nCount_RNA", "nFeature_RNA", "nCount_SCT", "nFeature_SCT",
                                       "percent.mt", "pool", "hash.ID", "treatment", "run", 
                                       "SCT_snn_res.0.1", "SCT_snn_res.1")]

saveRDS(mouse, file = "/projects/mludwig/DVC/output/Seurat_objs/mouse/mouse_Seurat_obj.rds")