In [None]:
# Imports
library(Seurat)

"package 'Seurat' was built under R version 4.4.2"
Loading required package: SeuratObject

"package 'SeuratObject' was built under R version 4.4.2"
Loading required package: sp

"package 'sp' was built under R version 4.4.2"

Attaching package: 'SeuratObject'


The following objects are masked from 'package:base':

    intersect, t


Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats

"package 'matrixStats' was built under R version 4.4.2"

Attaching package: 'MatrixGenerics'


The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars

In [8]:
ncol(samples)

In [None]:
# Set wd
setwd("../../../Data/Bashore et al. (2024)")
list.files()
getwd()

# Change setting for visualizing plots within VSCODE
options(repr.plot.width=15, repr.plot.height=12)

# Read in data
P4 <- Read10X(data.dir = "P4_S")
P6 <- Read10X(data.dir = "P6_A")
P9 <- Read10X(data.dir = "P9_S")
P13 <- Read10X(data.dir = "P13_S")
P14 <- Read10X(data.dir = "P14_S")
P15 <- Read10X(data.dir = "P15_A")
P16 <- Read10X(data.dir = "P16_A")
P17 <- Read10X(data.dir = "P17_A")
P18 <- Read10X(data.dir = "P18_S")
P19 <- Read10X(data.dir = "P19_A")
P20 <- Read10X(data.dir = "P20_A")
P21 <- Read10X(data.dir = "P21_S")

# Create Seurat objects
P4 <- CreateSeuratObject(P4, project="Bashore et al. (2024), P4")
P6 <- CreateSeuratObject(P6, project="Bashore et al. (2024), P6")
P9 <- CreateSeuratObject(P9, project="Bashore et al. (2024), P9")
P13 <- CreateSeuratObject(P13, project="Bashore et al. (2024), 13")
P14 <- CreateSeuratObject(P14, project="Bashore et al. (2024), P14")
P15 <- CreateSeuratObject(P15, project="Bashore et al. (2024), P15")
P16 <- CreateSeuratObject(P16, project="Bashore et al. (2024), P16")
P17 <- CreateSeuratObject(P17, project="Bashore et al. (2024), P17")
P18 <- CreateSeuratObject(P18, project="Bashore et al. (2024), P18")
P19 <- CreateSeuratObject(P19, project="Bashore et al. (2024), P19")
P20 <- CreateSeuratObject(P20, project="Bashore et al. (2024), P20")
P21 <- CreateSeuratObject(P21, project="Bashore et al. (2024), P21")

# Merge the Seurat objects
samples <- merge(P4, y=list(P6, P9, P13, P14, P15, P16, P17, P18, P19, P20, P21))

# Merge layers
samples[["RNA"]] <- JoinLayers(samples[["RNA"]])

# Export R object for all mice integration
samples[["Species"]] <- "Human"
saveRDS(samples, file = "Bashore_raw.rds")

"Some cell names are duplicated across objects provided. Renaming to enforce unique cell names."


In [None]:
ncol(samples)

In [3]:
# Preprocessing function which performs a basic workflow for scRNA seq analysis
pre_process <- function(data, nFeatures_RNA = 200, nCounts_RNA = 3, percents.mt = 5, pattern = "MT", resolutions = seq(0.5, 1.5, by = 0.1), k.weights = 100) {
    # Split
    data[["RNA"]] <- split(data[["RNA"]], f = data$orig.ident)

    # Get percentage mt                      
    data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = sprintf("^%s-", pattern))

    # Subsets
    data <- subset(x = data, subset = nFeature_RNA >= nFeatures_RNA & nCount_RNA >= nCounts_RNA & percent.mt <= percents.mt)

    # Normalization
    data <- NormalizeData(data, normalization.method = "LogNormalize", scale.factor = 10000)
    
    # Find Variable features
    data <- FindVariableFeatures(data, selection.method = "vst", nfeatures = 2000)
    
    # Scale Data
    data <- ScaleData(data, vars.to.regress = c("percent.mt", "nFeature_RNA"))
    
    # Run PCA
    data <- RunPCA(data, npcs = 100, verbose=FALSE)

    # Get significant PCAs through Jackstrawplot
    data <- JackStraw(data, num.replicate = 100)
    data <- ScoreJackStraw(data, dims = 1:20)

    # Check the amount of significant pcs
    significant_pcs <- length(which(data@reductions$pca@jackstraw$overall.p.values < 0.05))

    # Run UMAP
    data <- RunUMAP(data, reduction = "pca", dims = 1:significant_pcs, verbose=FALSE)

    # Integrate # Replaced CCA by Harmony
    data <- IntegrateLayers(object = data, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca", k.weight = k.weights)

    data[["RNA"]] <- JoinLayers(data[["RNA"]])

    # Post integration processing
    data <- RunPCA(data, npcs = 100, verbose=FALSE)

    # Get significant PCAs through Jackstrawplot
    data <- JackStraw(data, num.replicate = 100)
    data <- ScoreJackStraw(data, dims = 1:20)

    # Check the amount of significant pcs
    significant_pcs <- length(which(data@reductions$pca@jackstraw$overall.p.values < 0.05))

    # Clustering
    data <- FindNeighbors(data, reduction = "integrated.cca", dims = 1:significant_pcs)
    data <- FindClusters(data, resolution = resolutions)
    data <- RunUMAP(data, dims = 1:significant_pcs, reduction = "integrated.cca", verbose=FALSE)

    return(data)
}

data <- pre_process(samples, pattern= "MT", percents.mt = 10)

Normalizing layer: counts.P1

Normalizing layer: counts.P2

Normalizing layer: counts.P3

Normalizing layer: counts.P4

Normalizing layer: counts.P5

Normalizing layer: counts.P6

Normalizing layer: counts.P7

Normalizing layer: counts.P8

Finding variable features for layer counts.P1

Finding variable features for layer counts.P2

Finding variable features for layer counts.P3

Finding variable features for layer counts.P4

Finding variable features for layer counts.P5

Finding variable features for layer counts.P6

Finding variable features for layer counts.P7

Finding variable features for layer counts.P8

Regressing out percent.mt, nFeature_RNA

Centering and scaling data matrix

"The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session"
Finding all pairwise anchors

Running C

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35112
Number of edges: 1370691

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9124
Number of communities: 20
Elapsed time: 10 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35112
Number of edges: 1370691

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9046
Number of communities: 21
Elapsed time: 10 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35112
Number of edges: 1370691

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8970
Number of communities: 24
Elapsed time: 12 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 35112
Number of edges: 1370691

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8901
Number of communities: 24
Elapsed time: 11 seconds
Modu

In [None]:
saveRDS(data, file = "Bashore.rds")