-
Notifications
You must be signed in to change notification settings - Fork 0
/
dexseq.sh
95 lines (53 loc) · 2.73 KB
/
dexseq.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#qsub -pe mpi 10 -l h_vmem=10G -b y -cwd -N dexseq -e dexseq.err -o dexseq.log /export/bio/R/R-3.2.1/bin/R CMD BATCH dexseq_run.R
/export/bio/R/R-3.2.1/bin/R
source("http://bioconductor.org/biocLite.R")
biocLite("DEXSeq")
library(DEXSeq)
biocLite("BiocParallel")
library(BiocParallel)
BPPARAM = MulticoreParam(workers=4)
#place with python scripts /export/bio/R/R-3.0.1/lib64/R/library/DEXSeq/python_scripts/
#1 covert gtf to DEXseq specific gff
#/export/bio/python/python2.7 /export/bio/R/R-3.0.1/lib64/R/library/DEXSeq/python_scripts/dexseq_prepare_annotation.py Oreochromis_niloticus.BROADON2.gtf Oreochromis_niloticus.BROADON2.DEXseq.gff
#/export/bio/python/python2.7 /export/bio/R/R-3.0.1/lib64/R/library/DEXSeq/python_scripts/dexseq_prepare_annotation.py Metriaclima_zebra.BROADMZ2.gtf Metriaclima_zebra.BROADMZ2.DEXseq.gff
#2 get counts for bam
#counts_dexseq.sh ## this didnt work on the server because python failed to import HTseq when qsubbed
#qsub -q all.q@romulus -V -pe mpi 8 -l h_vmem=8G -b y -cwd -N count -e counts_dexseq.err -o counts_dexseq.log /bin/sh counts_dexseq.sh # works only on romulus
#3 dexseq in R
#library(DEXSeq)
setwd("/netapp/home/singh/pooja_phd/data/pharyngeal_jaw_transcriptomes/raw_demultiplexed_bam/paper1/map2Onil/all_bam/alternative_splicing/")
# reads files
countFiles = list.files(".", pattern="DEXseqcount.txt$", full.names=TRUE)
countFiles
#read gff for DEXseq
flattenedFile = list.files("/netapp/home/singh/pooja_phd/data/cichlid_reference_genomes/new_Oreochromis/gtf_brawand/", pattern="seq.gff$", full.names=TRUE)
flattenedFile
# import sample table
sampleTable <- read.table("paper1_speciesinfo.txt", header=T, row.names=1)
#start analysis
dxd = DEXSeqDataSetFromHTSeq(countFiles, sampleData=sampleTable, design= ~ sample + exon + condition:exon, flattenedfile=flattenedFile)
# normlisation
dxd = estimateSizeFactors(dxd, BPPARAM=BPPARAM)
dxd = estimateDispersions(dxd, BPPARAM=BPPARAM)
#estimate dispersion
png("DEXseq_dispersion_estimates.png")
plotDispEsts(dxd)
dev.off()
#test of diff exp of exons
dxd = testForDEU(dxd, BPPARAM=BPPARAM)
#remove totalt gene expression effects
dxd = estimateExonFoldChanges(dxd, fitExpToVar="condition", BPPARAM=BPPARAM)
#results
dxr1 = DEXSeqResults(dxd)
write.table(dxr1, file = "DEXseq_fullresults.txt", sep = "\t", quote=F)
#plot MA plot
png("DEXseq_MAplot.png")
plotMA(dxr1, cex=0.8 )
dev.off()
#DE isoforms at FDR 5%
genes <- table(dxr1$padj < 0.05)
write.table(genes, file = "DEXseq_sig_results_0.05.txt", sep = "\t", quote=F)
#DE genes affected at FDR 5%
genesN <- table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any) )
write.table(genes, file = "DEXseq_sig_results_0.05_genesAffected.txt", sep = "\t", quote=F)
#netapp/home/tmp/RtmpAeSB6R/downloaded_packages