-
Notifications
You must be signed in to change notification settings - Fork 1
/
converting_to_loom.R
82 lines (73 loc) · 4.25 KB
/
converting_to_loom.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
##########################################################
## ccAF: converting_to_loom.R ##
## ______ ______ __ __ ##
## /\ __ \ /\ ___\ /\ \/\ \ ##
## \ \ __ \ \ \___ \ \ \ \_\ \ ##
## \ \_\ \_\ \/\_____\ \ \_____\ ##
## \/_/\/_/ \/_____/ \/_____/ ##
## @Developed by: Plaisier Lab ##
## (https://plaisierlab.engineering.asu.edu/) ##
## Arizona State University ##
## 242 ISTB1, 550 E Orange St ##
## Tempe, AZ 85281 ##
## @Author: Chris Plaisier, Samantha O'Connor ##
## @License: GNU GPLv3 ##
## ##
## If this program is used in your analysis please ##
## mention who built it. Thanks. :-) ##
##########################################################
# docker run -it -v '<where you have U5_hNSC_Neural_G0>:/files' cplaisier/scrna_seq_velocity
# R 3.6.2 Linux (Docker hub instance: cplaisier/scrna_seq_velocity)
# Seurat 3.1.2
library(Seurat)
library(plyr)
library(dplyr)
library(org.Hs.eg.db)
# Set working directory
#setwd('/files/ccAF')
## WT
WT.data = Read10X(data.dir = "data/U5_hNSC/WT/filtered_gene_bc_matrices/hg19/")
WT = CreateSeuratObject(counts = WT.data, project = "WT", min.cells = 3, min.features = 200)
WT[["percent.mt"]] = PercentageFeatureSet(WT, pattern = "^MT-")
pdf('WT_mt_vs_Count.pdf')
FeatureScatter(WT, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
WT = subset(WT, subset = nCount_RNA < 18000 & percent.mt < 6)
WT = NormalizeData(WT)
WT = FindVariableFeatures(WT, selection.method = "vst", nfeatures = 3000)
all.genes = rownames(WT)
WT = ScaleData(WT, features = all.genes)
WT[['clusts_named']] = clusts_named[tmp5[paste(colnames(WT),'_1',sep='')],'clusts_named']
WT[['clusts_num']] = clusts_num[tmp5[paste(colnames(WT),'_1',sep='')],'clusts_named']
Idents(WT) = 'clusts_named'
WT.markers = FindAllMarkers(WT, only.pos=F, min.pct=0.25, logfc.threshold = 0.2)
WT_marker_genes_0_3 = (WT.markers %>% group_by(cluster) %>% filter(avg_logFC>0.3))$gene
WT_marker_genes_0_5 = (WT.markers %>% group_by(cluster) %>% filter(avg_logFC>0.5))$gene
WT_marker_gene_top50 = (WT.markers %>% group_by(cluster) %>% top_n(50,avg_logFC))$gene
## sgTAOK1
taok1.data = Read10X(data.dir = "data/U5_hNSC/sgTAOK1/filtered_gene_bc_matrices/hg19/")
taok1 = CreateSeuratObject(counts = taok1.data, project = "sgTAOK1", min.cells = 3, min.features = 200)
taok1[["percent.mt"]] = PercentageFeatureSet(taok1, pattern = "^MT-")
pdf('taok1_mt_vs_Count.pdf')
FeatureScatter(taok1, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
taok1 = subset(taok1, subset = nCount_RNA < 18000 & percent.mt < 10)
taok1 = NormalizeData(taok1)
taok1 = FindVariableFeatures(taok1, selection.method = "vst", nfeatures = 3000)
all.genes = rownames(taok1)
taok1 = ScaleData(taok1, features = all.genes)
taok1[['clusts_named']] = clusts_named[tmp5[paste(colnames(taok1),'_2',sep='')],'clusts_named']
taok1[['clusts_num']] = clusts_num[tmp5[paste(colnames(taok1),'_2',sep='')],'clusts_named']
Idents(taok1) = 'clusts_named'
taok1.markers = FindAllMarkers(taok1, only.pos=F, min.pct=0.25, logfc.threshold = 0.2)
taok1_marker_genes_0_3 = (taok1.markers %>% group_by(cluster) %>% filter(avg_logFC>0.3))$gene
taok1_marker_genes_0_5 = (taok1.markers %>% group_by(cluster) %>% filter(avg_logFC>0.5))$gene
taok1_marker_gene_top50 = (taok1.markers %>% group_by(cluster) %>% top_n(50,avg_logFC))$gene
## Highly varaible gene list WT int sgTAOK1 (n = 1584), uses vst method
hvg1 = intersect(WT@assays$RNA@var.features, taok1@assays$RNA@var.features)
write.csv(hvg1,'highlyVarGenes_WT_sgTAOK1_1584.csv')
## Integrate the datasets
int_anchors = FindIntegrationAnchors(object.list = list(WT = WT, taok1 = taok1), dims=1:30, anchor.features = 10000)
cellcycle_int_2 = IntegrateData(anchorset = int_anchors, dims = 1:30, features.to.integrate=unique(c(hvg1, mg1, mg2, mg3, hvg2, mg4, mg5, mg6, int_anchors@anchor.features)))
DefaultAssay(cellcycle_int_2) = "integrated"
as.loom(cellcycle_int_2, assay='integrated', filename='cellcycle_int_integrated.loom')