Permalink
Browse files

No commit message

  • Loading branch information...
Sébastien Boisvert
Sébastien Boisvert committed Jan 21, 2009
0 parents commit a1e0fd9a93cb19225f12468768ce52b55b6bac08
@@ -0,0 +1 @@
+Sébastien "sebhtml" Boisvert http://genome.ulaval.ca/users/boisvert/
674 COPYING

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1 @@
+http://denovoassembler.svn.sourceforge.net/viewvc/denovoassembler/
@@ -0,0 +1,6 @@
+You need a 64-bit machine.
+you need also a c++ compiler (standard: c++98)
+and a UNIX box (mkdir, etc.).
+
+If you use GNU/Linux, simply type 'bash scripts/build.sh'. The executables
+will be in build/bin.
@@ -0,0 +1 @@
+SUBDIRS = src
2 NEWS
@@ -0,0 +1,2 @@
+2008-12-14
+ NEWS file created.
13 README
@@ -0,0 +1,13 @@
+http://DeNovoAssembler.sf.net
+
+DNA assembly is now a challenge because of the
+ overwhelming amount of data produced by sequencing
+ facilities. De Novo Assembler is an assembler
+ to perform De Novo assembly with new sequencing
+technologies (i.g. Titanium).
+
+
+Also to ensure that nothing goes wrong, you should limit the memory usage.
+
+
+ulimit -v 230000000 # in kB
@@ -0,0 +1,6 @@
+AC_INIT(dna,1.0,sebastien.boisvert.3@ulaval.ca)
+AC_CONFIG_SRCDIR([src/Read.cpp])
+AM_INIT_AUTOMAKE
+AC_PROG_CXX
+AC_OUTPUT(Makefile)
+AC_OUTPUT(src/Makefile)
@@ -0,0 +1,3 @@
+sync
+echo 3 > /proc/sys/vm/drop_caches
+echo 0 > /proc/sys/vm/drop_caches
@@ -0,0 +1,12 @@
+aclocal
+autoconf
+automake --add-missing
+autoreconf
+# -pedantic
+#
+
+export CXXFLAGS="-O6 -Wall -std=c++98 -fomit-frame-pointer -static "
+export LDFLAGS=$CXXFLAGS
+mkdir -p build
+./configure CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" --prefix=$(pwd)/build
+make install
@@ -0,0 +1,7 @@
+# 230000000
+# 230000000/1024/1024 -> 219 GB
+# 606 fastq files
+
+ulimit -v 230000000
+
+nohup dna -buckets 1000000000 -assemblyDirectory HumanGenome $(ls ~/Datasets/SRA000271/*fastq|head -n100) > /dev/null &
@@ -0,0 +1,15 @@
+#!/usr/bin/ruby
+
+puts ">"
+10000.times do
+ i=rand(4)
+ if i==0
+ print 'A'
+ elsif i==1
+ print 'T'
+ elsif i==2
+ print 'C'
+ elsif i==3
+ print 'G'
+ end
+end
@@ -0,0 +1,42 @@
+#!/usr/bin/ruby
+
+if ARGV.size!=3
+ puts "usage"
+ puts "keepLargeContigs.rb <contigsFile> <minimumContigSize> <largeContigsFile>"
+ exit
+end
+
+seq=""
+
+contigs=[]
+f=File.open ARGV[0]
+while l=f.gets
+ if l[0..0]=='>'
+ contigs<< seq
+ seq=""
+ else
+ seq<< l.strip
+ end
+end
+
+contigs<< seq
+f.close
+
+threshold=ARGV[1].to_i
+k=1
+out=File.open ARGV[2],"w+"
+contigs.each do |i|
+ if i.length<threshold
+ next
+ end
+ j=0
+ out.puts ">#{k} #{i.length}"
+ columns=60
+ while j<i.length
+ out.puts i[j..(j+columns-1)]
+ j+=columns
+ end
+ k+=1
+end
+
+out.close
@@ -0,0 +1,2 @@
+1
+200x36x36-071113_EAS56_0053-s_1_1.fastq 200x36x36-071113_EAS56_0053-s_1_2.fastq 300
@@ -0,0 +1,10 @@
+# 454 S. pneumoniae
+nohup dna -assemblyDirectory 1020 ~/Datasets/SRA001020/sff/ETJITFZ02.sff > /dev/null &
+# 454 S. cerevisiae
+nohup dna -assemblyDirectory 257 ~/Datasets/SRA000257/sff/*.sff > /dev/null &
+# Solexa S. cerevisiae
+nohup dna -assemblyDirectory 1177 ~/Datasets/SRA001177/SRR003681.*fastq* > /dev/null &
+# 454 L. tarentolae
+nohup dna -assemblyDirectory tar ~/Datasets/tar-454/sff/*.sff > /dev/null &
+# 454 r6 S pneumoniae
+nohup dna -assemblyDirectory r6 ~/Datasets/Marc/r6/sff/E*.sff > /dev/null &
@@ -0,0 +1,97 @@
+#!/usr/bin/ruby
+
+if ARGV.size==0
+ puts "You must provide a file"
+ exit
+end
+
+def revComp a
+ b=""
+ i=a.length-1
+ while i>=0
+ s=a[i..i]
+ if s=='A'
+ b<< 'T'
+ elsif s=='T'
+ b<< 'A'
+ elsif s=='C'
+ b<< 'G'
+ elsif s=='G'
+ b<< 'C'
+ end
+ i-=1
+ end
+ b
+end
+
+chromosomes=[]
+f=File.open ARGV[0]
+seq=""
+while l=f.gets
+ l=l.upcase
+ if l[0..0]=='>'
+ if seq!=""
+ chromosomes<< seq
+ end
+ seq=""
+ else
+ seq<< l.strip
+ end
+end
+
+chromosomes<< seq
+f.close
+
+coverage=25
+readLength=250
+errors=4
+readID=1
+chromosomes.each do |genome|
+ gSize=genome.length
+ position=0
+ while position<gSize
+ coverage.times do |t|
+ read_length=readLength+rand(100)-50
+ start=position+rand(read_length)-read_length/2
+ if start<0
+ start=0
+ end
+ sequence=genome[start..(start+read_length)]
+ if sequence.nil?
+ next
+ end
+ errorsInRead=errors+rand(4)-2
+ errorsInRead.times do
+ break
+ n=rand(4)
+ p=rand(sequence.length)
+ if n==0
+ sequence[p..p]='A'
+ elsif n==1
+ sequence[p..p]='T'
+ elsif n==2
+ sequence[p..p]='C'
+ elsif n==3
+ sequence[p..p]='G'
+ end
+ end
+ if rand(2)==0
+ puts "@#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
+ puts sequence
+ puts "+#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
+ else
+ puts "@#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
+ puts revComp(sequence)
+ puts "+#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
+ end
+ readID+=1
+ j=0
+ while j<sequence.length
+ print 'F'
+ j+=1
+ end
+ puts ""
+ end
+ position+=readLength
+ end
+end
@@ -0,0 +1,89 @@
+#!/usr/bin/ruby
+
+if ARGV.size==0
+ puts "You must provide a file"
+ exit
+end
+
+def revComp a
+ b=""
+ i=a.length-1
+ while i>=0
+ s=a[i..i]
+ if s=='A'
+ b<< 'T'
+ elsif s=='T'
+ b<< 'A'
+ elsif s=='C'
+ b<< 'G'
+ elsif s=='G'
+ b<< 'C'
+ end
+ i-=1
+ end
+ b
+end
+
+chromosomes=[]
+f=File.open ARGV[0]
+seq=""
+while l=f.gets
+ l=l.upcase
+ if l[0..0]=='>'
+ if seq!=""
+ chromosomes<< seq
+ end
+ seq=""
+ else
+ seq<< l.strip
+ end
+end
+
+chromosomes<< seq
+f.close
+
+coverage=10
+readLength=250
+readID=1
+errors=0
+chromosomes.each do |genome|
+ gSize=genome.length
+ position=0
+ while position<gSize
+ coverage.times do |t|
+ read_length=readLength
+ start=position
+ if start<0
+ start=0
+ end
+ sequence=genome[start..(start+read_length)]
+ if sequence.nil?
+ next
+ end
+ errorsInRead=errors+rand(4)-2
+ errorsInRead.times do
+ break
+ n=rand(4)
+ p=rand(sequence.length)
+ if n==0
+ sequence[p..p]='A'
+ elsif n==1
+ sequence[p..p]='T'
+ elsif n==2
+ sequence[p..p]='C'
+ elsif n==3
+ sequence[p..p]='G'
+ end
+ end
+ if rand(2)==0
+ puts ">#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
+ puts sequence
+ else
+ puts ">#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
+ puts revComp(sequence)
+ end
+ readID+=1
+ end
+ position+=readLength
+ end
+end
Oops, something went wrong.

0 comments on commit a1e0fd9

Please sign in to comment.