-
Notifications
You must be signed in to change notification settings - Fork 0
/
Pancreas_SingleCell_Analysis_Script.Rmd
242 lines (189 loc) · 9.74 KB
/
Pancreas_SingleCell_Analysis_Script.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
---
title: "R Notebook"
output: html_notebook
---
This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*.
```{r}
#Data aquired from Azimuth (https://azimuth.hubmapconsortium.org/references/#Human%20-%20Pancreas) is loaded
name = readRDS(file = "../shr523/enge.rds")
#amount of mitochondrial genes are determined and visualised
name <- PercentageFeatureSet(name, pattern = "^MT-", col.name = "percent.mt")
VlnPlot(name, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)
```
```{r}
#cells are quality controled using the specified restrictions
name <- subset(name, subset = nFeature_RNA > quantile(name$nFeature_RNA,.01) &
nFeature_RNA < quantile(name$nFeature_RNA,.99) &
nCount_RNA < quantile(name$nCount_RNA,.99))
#percent.mt < quantile(name$percent.mt, .95))
#Potential mitochondrial and ribosomal genes are removed
Mt <- grepl(rownames(name), pattern=("^MT-"))
RP <- grepl(rownames(name), pattern=("^RP[SL]"))
keep_genes <- rownames(name)[!(Mt+RP)]
name@assays$RNA@counts <- name@assays$RNA@counts[keep_genes,]
name@assays$RNA@data <- name@assays$RNA@data[keep_genes,]
name@assays$RNA@meta.features <- name@assays$RNA@meta.features[keep_genes,]
dim(name)
rm(Mt, RP, keep_genes)
dim(name)
```
### 3.2 Check the cell cycle noises
```{r}
#lists of cell cycle genes (both s-phase and g2m-phase) is loaded. Contact authors to acquire the specific list used.
s.genes <- read.table("/home/kgr851/new_analysis_new_cc/cc/s.txt")
g2m.genes <- read.table("/home/kgr851/new_analysis_new_cc/cc/g2m.txt")
#the RDS object is normalized without taking cell cycle into consideration to see if cell cycle regression is needed determined by a PCA.
name = SCTransform(name, verbose = FALSE, variable.features.n=2000,
vars.to.regress = c("nFeature_RNA", "nCount_RNA"))
name <- RunPCA(name, features = c(s.genes$V1, g2m.genes$V1))
DimPlot(name, reduction = "pca")
#Clearly, there are cell cycle effects, so here we will regress them out
```
### 3.2 Regress out cell cycle noises together with seq depth varation**
```{r}
#Calculat the cell cycle scores
name <- CellCycleScoring(name, s.features = s.genes$V1, g2m.features = g2m.genes$V1, set.ident = TRUE)
#Normalize data taking the cell cycle phase into consideration
DefaultAssay(name) <- "RNA"
name = SCTransform(name, verbose = FALSE, variable.features.n=2000,
vars.to.regress = c("nFeature_RNA", "nCount_RNA", "S.Score", "G2M.Score"))
#plot to see if the cell cycle noise have been diminished
name <- RunPCA(name, features = c(s.genes$V1, g2m.genes$V1))
Idents(name) <- name$Phase
DimPlot(name, reduction = "pca")
```
# 4. QC
```{r}
#Do PCA and UMAP reduction for data visualisation
name <- RunPCA(name, features = name@assays$SCT@var.features, npcs=100)
name <- RunUMAP(name, reduction = "pca", dims = 1:40, seed.use=888, repulsion.strength = 0.1, min.dist=0.5, n.neighbors = 30L, n.epochs = NULL)
#Identify parameters for clustering
res.range <- c(seq(0.05,1,0.05))
assay <- "SCT"
dims <- 20
reduction <- "pca"
#Defining a function, which clusters a RDS in a range of resolutions, and calculates both the silhouette score and the negative silhouette proportion to determine which resolution is optimal
clusteringKit <- function(name, assay, dim, res.range, reduction){
DefaultAssay(name) <- assay
#using FindNeighbors (Seurat), a KNN based clustering algorithm, to compute a SNN graph, which then can be used to cluster the cells w. their neighbors
name <- FindNeighbors(name, dims=1:dim, reduction=reduction)
for (i in res.range){
#FindClusters is used to cluster the aforementioned SNN graph by optimising the modularity function. This is carried out for all resolutions in the range of interest.
name <- FindClusters(name, resolution=i)
}
DefaultAssay(name) <- assay
dist.matrix <- dist(x = Embeddings(object = name[[reduction]])[, 1:dim])
clusters <- paste0(assay,"_snn_res.", res.range)
getSil <- function(clr) {
clr <- name@meta.data[[clr]]
sil <- cluster::silhouette(x = as.numeric(x = as.factor(x = clr)), dist = dist.matrix)
sil_value <- sil
return(sil_value)
}
sls <- lapply(clusters, getSil) #mc.cores = 1)
sls_median <- sapply(sls, function(x) median(x[,3])) %>% setNames(., res.range)
sls_neg_prop <- sapply(sls, function(x) sum(x[,3]<0)/length(x[,3])) %>% setNames(., res.range)
p_list <- lapply(res.range, function(res){
Idents(name) = name@meta.data[paste0(assay, "_snn_res.",res)]
DimPlot(name, reduction = "umap", label = TRUE, group.by = paste0(assay, "_snn_res.",res), pt.size =0.3, raster=F)
})
return(list(name=name, sls_median=sls_median, sls_neg_prop=sls_neg_prop, dimplots=p_list))
}
#update the RDS object
clustered <- clusteringKit(name, assay="SCT", dim=30, res.range=seq(0.05, 3, 0.05), reduction="pca")
name <- clustered$name
#plot to find highest silhouette score w. low negative silhouette proportion
plot(clustered$sls_media~seq(0.05, 3, 0.05), type="l", ylab = "Silhouette score") + plot(clustered$sls_neg_prop~seq(0.05, 3, 0.05), type="l", ylab = "Negative silhouette proportion")
#resolution of 0.3-0.4 looks to havee high silhouette and low negative silhouette score. These are inspected by doing DimPlots
DimPlot(name, reduction = "umap", label = T, group.by = "SCT_snn_res.0.25", pt.size =3, raster=F, label.size = 7, label.box = T, repel = T)
DimPlot(name, reduction = "umap", label = T, group.by = "SCT_snn_res.0.3", pt.size =3, raster=F, label.size = 7, label.box = T, repel = T)
DimPlot(name, reduction = "umap", label = T, group.by = "SCT_snn_res.0.35", pt.size =3, raster=F, label.size = 7, label.box = T, repel = T)
DimPlot(name, reduction = "umap", label = T, group.by = "SCT_snn_res.0.4", pt.size =3, raster=F, label.size = 7, label.box = T, repel = T)
#A resolution of 0.3 is deemed optimal
```
##DEG analysis ###
```{r}
#Idents are changed to the optimal resolution determined above.
Idents(name) <- "SCT_snn_res.0.3"
# find markers for every cluster compared to all remaining cells, report only the positive ones
name_markers_0.3 <- FindAllMarkers(name, min.pct = 0.1, logfc.threshold = 0.5, only.pos = T)
#Use this line to brows DEG of different clusters e.g. GCG
name_markers_0.3 %>% filter(gene == "GCG")
```
```{r}
#The expression of markers indicative of different cell types are explored in the data set, and the clustered are annotated accordingly. The ket markers were obtained from Azimuth (https://azimuth.hubmapconsortium.org/references/#Human%20-%20Pancreas)
#Beta cells
FeaturePlot(name,
reduction = "umap",
features = c("IAPP", "INS", "DLK1"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#alpha cells
FeaturePlot(name,
reduction = "umap",
features = c("GCG", "TTR", "PPP1R1A"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#delta cells
FeaturePlot(name,
reduction = "umap",
features = c("SST", "RBP4", "SERPINA1"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#ductal cells
FeaturePlot(name,
reduction = "umap",
features = c("SPP1", "MMP7", "IGFBP7"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#ductal cells
FeaturePlot(name,
reduction = "umap",
features = c("SPP1", "MMP7", "IGFBP7"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#mesenchymal cells
FeaturePlot(name,
reduction = "umap",
features = c("THY1"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=1) & NoLegend()
#acinar cells
FeaturePlot(name,
reduction = "umap",
features = c("REG1A", "PRSS1", "CTRB2"),
pt.size = 1, keep.scale = "feature",
label=F, ncol=3) & NoLegend()
#The cell type for each cluster is added in the meta data
name@meta.data <- name@meta.data %>%
mutate(cluster = case_when(
SCT_snn_res.0.3 == 0 ~ "alpha cells (1)",
SCT_snn_res.0.3 == 1 ~ "beta cells (1)",
SCT_snn_res.0.3 == 2 ~ "acinar cells (1)",
SCT_snn_res.0.3 == 3 ~ "alpha cells (2)",
SCT_snn_res.0.3 == 4 ~ "ductal cells (1)",
SCT_snn_res.0.3 == 5 ~ "ductal cells (2)",
SCT_snn_res.0.3 == 6 ~ "alpha cells (3)",
SCT_snn_res.0.3 == 7 ~ "alpha cells (4)",
SCT_snn_res.0.3 == 8 ~ "beta cells (2)",
SCT_snn_res.0.3 == 9 ~ "delta cells",
SCT_snn_res.0.3 == 10 ~ "alpha cells (5)",
SCT_snn_res.0.3 == 11 ~ "acinar cells (2)",
SCT_snn_res.0.3 == 12 ~ "mesenchymal cells",
SCT_snn_res.0.3 == 13 ~ "alpha cells (6)",
SCT_snn_res.0.3 == 14 ~ "acinar cells (3)",
SCT_snn_res.0.3 == 15 ~ "alpha cells (7)",
SCT_snn_res.0.3 == 16 ~ "ductal cells (3)"
))
#Data is visualised either with or without labels
DimPlot(name, reduction = "umap", label =T, pt.size = 3, label.size = 5, group.by = "cluster", repel = T)+ theme(legend.position = 'none', axis.title.x=element_blank())
DimPlot(name, reduction = "umap", label =T, pt.size = 3, label.size = 5, repel = T)+ theme(legend.position = 'none', axis.title.x=element_blank())
#Expression of GCGR in the data set is visualsed in a FeaturePlot and ViolinPlot
FeaturePlot(name,
reduction = "umap",
features = c("GCGR"),
pt.size = 3, keep.scale = "feature",max.cutoff = 0.9,
label=F, ncol=1) & NoLegend()
VlnPlot(name, group.by = "cluster", 'GCGR', log=T, sort = "increasing", pt.size = 2) + theme(legend.position = 'none', axis.title.x=element_blank())
```