Permalink
Browse files

Genome reader

  • Loading branch information...
1 parent 19404e5 commit d714c2bacd24e4f2254f5cf03e50cf846b20fd1d @pjotrp committed May 20, 2014
Showing with 24 additions and 21 deletions.
  1. +1 −1 VERSION
  2. +15 −19 lib/bigbio/db/fasta/fastagenomereader.rb
  3. +8 −1 spec/fastareader_spec.rb
View
@@ -1 +1 @@
-0.1.6
+0.1.7-pre
@@ -1,8 +1,8 @@
-# Buffered FastaGenomeReader
+# Buffered FastaGenomeReader.
+#
+# The logic is straightforward. Keep reading a file for the first matching
+# chr,pos, max_bufsize characters at a time. Looking back is not allowed.
#
-
-class BufferMissed < Exception
-end
class FastaGenomeReader
@@ -19,15 +19,20 @@ def initialize line =nil, func = nil
@offset = @start
@buf = ''
end
+
def value pos
@buf[pos-@offset]
end
- def in_range? pos
+
+ def in_range? chr,pos
+ return false if chr != @chr
@offset <= pos and pos < @offset+@buf.size
end
+
def move_offset
@offset += @buf.size
end
+
def empty_buf
move_offset
@buf = ""
@@ -45,17 +50,10 @@ def initialize fn, parse_descriptor_func, max_bufsize=64_000
# Returns the reference nucleotide. Chr can be any name (i.e., chr or a bin).
def ref chr,pos
- p @rec
- p [chr,pos]
- if @rec.chr == chr and @rec.in_range?(pos)
- # p "In range"
- # if chr is current and pos within range return it
- @rec.value(pos)
- else
- # recursively keep reading until position reached
- read_next
- ref(chr,pos)
+ while not @rec.in_range?(chr,pos)
+ return nil if not read_next
end
+ @rec.value(pos)
end
# Fetch in current chromosome/bin
@@ -68,18 +66,16 @@ def [] pos
# Fill the next buffer until the next descriptor is reached or the buffer
# is full
def read_next
- p ["***","READ NEXT"]
@rec.empty_buf
while (line = @f.gets)
next if line =~ /^#/
- p line
if line =~ /^>/
- @prev_rec = @rec
@rec = Record.new(line,@parse_descriptor_func)
else
@rec.buf << line.strip
- return if @rec.buf.size > @max_bufsize
+ return @rec if @rec.buf.size > @max_bufsize
end
end
+ nil
end
end
@@ -15,8 +15,15 @@
@genome.ref('X',120).should == 'C'
@genome.ref('X',479).should == 'T'
@genome.ref('X',480).should == 'T'
+ # Within the same record you can ref
+ @genome[480].should == 'T'
+ @genome[481].should == 'T'
+ @genome[482].should == 'N'
@genome.ref('X',511).should == 'N'
- @genome.ref('X',560).should == 'T' # <- reads into the 3rd sequenc
+ @genome.ref('X',560).should == 'T' # <- reads into the 3rd sequence
+ @genome.ref('Y',29).should == nil
+ @genome.ref('X',10).should == nil
+ @genome.ref('X',10000).should == nil
end
end

0 comments on commit d714c2b

Please sign in to comment.