Skip to content

Commit

Permalink
No commit message
Browse files Browse the repository at this point in the history
  • Loading branch information
Sébastien Boisvert committed Jan 21, 2009
0 parents commit a1e0fd9
Show file tree
Hide file tree
Showing 34 changed files with 4,032 additions and 0 deletions.
1 change: 1 addition & 0 deletions AUTHORS
@@ -0,0 +1 @@
Sébastien "sebhtml" Boisvert http://genome.ulaval.ca/users/boisvert/
674 changes: 674 additions & 0 deletions COPYING

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions ChangeLog
@@ -0,0 +1 @@
http://denovoassembler.svn.sourceforge.net/viewvc/denovoassembler/
6 changes: 6 additions & 0 deletions INSTALL
@@ -0,0 +1,6 @@
You need a 64-bit machine.
you need also a c++ compiler (standard: c++98)
and a UNIX box (mkdir, etc.).

If you use GNU/Linux, simply type 'bash scripts/build.sh'. The executables
will be in build/bin.
1 change: 1 addition & 0 deletions LICENSE
1 change: 1 addition & 0 deletions Makefile.am
@@ -0,0 +1 @@
SUBDIRS = src
2 changes: 2 additions & 0 deletions NEWS
@@ -0,0 +1,2 @@
2008-12-14
NEWS file created.
13 changes: 13 additions & 0 deletions README
@@ -0,0 +1,13 @@
http://DeNovoAssembler.sf.net

DNA assembly is now a challenge because of the
overwhelming amount of data produced by sequencing
facilities. De Novo Assembler is an assembler
to perform De Novo assembly with new sequencing
technologies (i.g. Titanium).


Also to ensure that nothing goes wrong, you should limit the memory usage.


ulimit -v 230000000 # in kB
6 changes: 6 additions & 0 deletions configure.ac
@@ -0,0 +1,6 @@
AC_INIT(dna,1.0,sebastien.boisvert.3@ulaval.ca)
AC_CONFIG_SRCDIR([src/Read.cpp])
AM_INIT_AUTOMAKE
AC_PROG_CXX
AC_OUTPUT(Makefile)
AC_OUTPUT(src/Makefile)
3 changes: 3 additions & 0 deletions scripts/LinuxDropCaches.sh
@@ -0,0 +1,3 @@
sync
echo 3 > /proc/sys/vm/drop_caches
echo 0 > /proc/sys/vm/drop_caches
12 changes: 12 additions & 0 deletions scripts/build.sh
@@ -0,0 +1,12 @@
aclocal
autoconf
automake --add-missing
autoreconf
# -pedantic
#

export CXXFLAGS="-O6 -Wall -std=c++98 -fomit-frame-pointer -static "
export LDFLAGS=$CXXFLAGS
mkdir -p build
./configure CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" --prefix=$(pwd)/build
make install
7 changes: 7 additions & 0 deletions scripts/doHuman.sh
@@ -0,0 +1,7 @@
# 230000000
# 230000000/1024/1024 -> 219 GB
# 606 fastq files

ulimit -v 230000000

nohup dna -buckets 1000000000 -assemblyDirectory HumanGenome $(ls ~/Datasets/SRA000271/*fastq|head -n100) > /dev/null &
15 changes: 15 additions & 0 deletions scripts/generateRandom.rb
@@ -0,0 +1,15 @@
#!/usr/bin/ruby

puts ">"
10000.times do
i=rand(4)
if i==0
print 'A'
elsif i==1
print 'T'
elsif i==2
print 'C'
elsif i==3
print 'G'
end
end
42 changes: 42 additions & 0 deletions scripts/keepLargeContigs.rb
@@ -0,0 +1,42 @@
#!/usr/bin/ruby

if ARGV.size!=3
puts "usage"
puts "keepLargeContigs.rb <contigsFile> <minimumContigSize> <largeContigsFile>"
exit
end

seq=""

contigs=[]
f=File.open ARGV[0]
while l=f.gets
if l[0..0]=='>'
contigs<< seq
seq=""
else
seq<< l.strip
end
end

contigs<< seq
f.close

threshold=ARGV[1].to_i
k=1
out=File.open ARGV[2],"w+"
contigs.each do |i|
if i.length<threshold
next
end
j=0
out.puts ">#{k} #{i.length}"
columns=60
while j<i.length
out.puts i[j..(j+columns-1)]
j+=columns
end
k+=1
end

out.close
2 changes: 2 additions & 0 deletions scripts/pairedInfo.txt
@@ -0,0 +1,2 @@
1
200x36x36-071113_EAS56_0053-s_1_1.fastq 200x36x36-071113_EAS56_0053-s_1_2.fastq 300
10 changes: 10 additions & 0 deletions scripts/runTests.sh
@@ -0,0 +1,10 @@
# 454 S. pneumoniae
nohup dna -assemblyDirectory 1020 ~/Datasets/SRA001020/sff/ETJITFZ02.sff > /dev/null &
# 454 S. cerevisiae
nohup dna -assemblyDirectory 257 ~/Datasets/SRA000257/sff/*.sff > /dev/null &
# Solexa S. cerevisiae
nohup dna -assemblyDirectory 1177 ~/Datasets/SRA001177/SRR003681.*fastq* > /dev/null &
# 454 L. tarentolae
nohup dna -assemblyDirectory tar ~/Datasets/tar-454/sff/*.sff > /dev/null &
# 454 r6 S pneumoniae
nohup dna -assemblyDirectory r6 ~/Datasets/Marc/r6/sff/E*.sff > /dev/null &
97 changes: 97 additions & 0 deletions scripts/simulate-454.rb
@@ -0,0 +1,97 @@
#!/usr/bin/ruby

if ARGV.size==0
puts "You must provide a file"
exit
end

def revComp a
b=""
i=a.length-1
while i>=0
s=a[i..i]
if s=='A'
b<< 'T'
elsif s=='T'
b<< 'A'
elsif s=='C'
b<< 'G'
elsif s=='G'
b<< 'C'
end
i-=1
end
b
end

chromosomes=[]
f=File.open ARGV[0]
seq=""
while l=f.gets
l=l.upcase
if l[0..0]=='>'
if seq!=""
chromosomes<< seq
end
seq=""
else
seq<< l.strip
end
end

chromosomes<< seq
f.close

coverage=25
readLength=250
errors=4
readID=1
chromosomes.each do |genome|
gSize=genome.length
position=0
while position<gSize
coverage.times do |t|
read_length=readLength+rand(100)-50
start=position+rand(read_length)-read_length/2
if start<0
start=0
end
sequence=genome[start..(start+read_length)]
if sequence.nil?
next
end
errorsInRead=errors+rand(4)-2
errorsInRead.times do
break
n=rand(4)
p=rand(sequence.length)
if n==0
sequence[p..p]='A'
elsif n==1
sequence[p..p]='T'
elsif n==2
sequence[p..p]='C'
elsif n==3
sequence[p..p]='G'
end
end
if rand(2)==0
puts "@#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
puts sequence
puts "+#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
else
puts "@#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
puts revComp(sequence)
puts "+#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
end
readID+=1
j=0
while j<sequence.length
print 'F'
j+=1
end
puts ""
end
position+=readLength
end
end
89 changes: 89 additions & 0 deletions scripts/simulate-perfect.rb
@@ -0,0 +1,89 @@
#!/usr/bin/ruby

if ARGV.size==0
puts "You must provide a file"
exit
end

def revComp a
b=""
i=a.length-1
while i>=0
s=a[i..i]
if s=='A'
b<< 'T'
elsif s=='T'
b<< 'A'
elsif s=='C'
b<< 'G'
elsif s=='G'
b<< 'C'
end
i-=1
end
b
end

chromosomes=[]
f=File.open ARGV[0]
seq=""
while l=f.gets
l=l.upcase
if l[0..0]=='>'
if seq!=""
chromosomes<< seq
end
seq=""
else
seq<< l.strip
end
end

chromosomes<< seq
f.close

coverage=10
readLength=250
readID=1
errors=0
chromosomes.each do |genome|
gSize=genome.length
position=0
while position<gSize
coverage.times do |t|
read_length=readLength
start=position
if start<0
start=0
end
sequence=genome[start..(start+read_length)]
if sequence.nil?
next
end
errorsInRead=errors+rand(4)-2
errorsInRead.times do
break
n=rand(4)
p=rand(sequence.length)
if n==0
sequence[p..p]='A'
elsif n==1
sequence[p..p]='T'
elsif n==2
sequence[p..p]='C'
elsif n==3
sequence[p..p]='G'
end
end
if rand(2)==0
puts ">#{readID}_#{start}_#{read_length}_F_#{errorsInRead}"
puts sequence
else
puts ">#{readID}_#{start}_#{read_length}_R_#{errorsInRead}"
puts revComp(sequence)
end
readID+=1
end
position+=readLength
end
end

0 comments on commit a1e0fd9

Please sign in to comment.