Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Genome reader

  • Loading branch information...
commit bc2bc33c63b5ec2055b883c3e29a814dd3a06bcb 1 parent b23b87f
@pjotrp authored
View
34 README.md
@@ -228,6 +228,40 @@ translate = Nucleotide::Translate.new(trn_table)
aa_frames = translate.aa_6_frames("ATCATTAGCAACACCAGCTTCCTCTCTCTCGCTTCAAAGTTCACTACTCGTGGATCTCGT")
```
+## Walk a FASTA (reference) genome
+
+Genomes and BACS often come as large (continuous) FASTA files. When
+variant/position queries happen on sorted data, the genome can be
+walked through once reading the whole file serially. This is what
+FastaGenomeReader does.
+
+The following code assumes the FASTA descriptors contain
+
+```ruby
+ >13 dna:chromosome chromosome:GRCh37:13:1:115169878:1
+```
+
+so 'chr' is captured, as well as 'start' and 'stop'. Using
+[bio-vcf](https://github.com/pjotrp/bioruby-vcf):
+
+```ruby
+genome = FastaGenomeReader.new('Hs_GRCh37_gatk.fasta', ->
+ { |descr| a = skip,skip,skip,chr,start,stop = descr.split(':')
+ chr, start.to_i, stop.to_i } )
+
+STDIN.each_line do | line |
+ next if line =~ /^#/
+ fields = VcfLine.parse(line)
+ rec = VcfRecord.new(fields,header)
+ if rec.var == genome.ref(vcf.chr,vcf.pos+1)
+ # do something
+ end
+end
+```
+
+FastaGenomeReader is buffered and tiled. You can override the size of
+64K.
+
# Project home page
Information on the source tree, documentation, examples, issues and
View
5 Rakefile
@@ -32,11 +32,6 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
spec.pattern = FileList['spec/**/*_spec.rb']
end
-RSpec::Core::RakeTask.new(:rcov) do |spec|
- spec.pattern = 'spec/**/*_spec.rb'
- spec.rcov = true
-end
-
task :test => :spec
task :default => :spec
View
6 bio-bigbio.gemspec
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Pjotr Prins"]
- s.date = "2014-05-16"
+ s.date = "2014-05-20"
s.description = "Fasta reader, ORF emitter, sequence translation"
s.email = "pjotr.public01@thebird.nl"
s.executables = ["fasta_filter.rb", "fasta_sort.rb", "getorf", "nt2aa.rb"]
@@ -63,11 +63,11 @@ Gem::Specification.new do |s|
s.homepage = "http://github.com/pjotrp/bigbio"
s.licenses = ["MIT"]
s.require_paths = ["lib"]
- s.rubygems_version = "1.8.23"
+ s.rubygems_version = "2.0.3"
s.summary = "Low memory sequence emitters"
if s.respond_to? :specification_version then
- s.specification_version = 3
+ s.specification_version = 4
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
View
1  lib/bigbio.rb
@@ -32,6 +32,7 @@
autoload :FastaReader, 'bigbio/db/fasta'
autoload :FastaWriter, 'bigbio/db/fasta'
+autoload :FastaGenomeReader, 'bigbio/db/fasta'
autoload :FastaPairedReader, 'bigbio/db/fasta'
autoload :FastaPairedWriter, 'bigbio/db/fasta'
autoload :BlastClust, 'bigbio/db/blast'
View
1  lib/bigbio/db/fasta.rb
@@ -11,3 +11,4 @@
require 'bigbio/db/fasta/fastawriter'
require 'bigbio/db/fasta/fastapairedreader'
require 'bigbio/db/fasta/fastapairedwriter'
+require 'bigbio/db/fasta/fastagenomereader'
View
31 lib/bigbio/db/fasta/fastagenomereader.rb
@@ -0,0 +1,31 @@
+# Buffered FastaGenomeReader
+#
+
+class BufferMissed < Exception
+end
+
+class FastaGenomeReader
+
+ # Initalize the reader of FASTA file
+ def initialize fn, parse_descriptor_func, bufsize=64_000
+ @f = File.open(fn)
+ @parse_descriptor = parse_descriptor_func
+ @bufsize = bufsize
+ @buf = read_next
+ end
+
+ # Returns the reference nucleotide. When the buffer is missed a BufferMissed
+ # exception is thrown.
+ def ref chr,pos
+ end
+
+private
+
+ # Fill the next buffer until the next descriptor is reached or the buffer
+ # is full
+ def read_next
+ while (line = @f.gets)
+ p line
+ end
+ end
+end
View
14 spec/fastareader_spec.rb
@@ -0,0 +1,14 @@
+
+require 'rspec'
+
+$: << "../lib"
+
+require 'bigbio'
+
+describe FastaGenomeReader, "when reading a full genome" do
+
+ it "should load the genome file" do
+ FastaGenomeReader.new("test/data/fasta/nt.fa", -> {})
+ end
+
+end
Please sign in to comment.
Something went wrong with that request. Please try again.