Skip to content
Browse files

Version 0.1.0. Basic binding with open and process_region functions.

  • Loading branch information...
1 parent 4c50c71 commit 5ae07f5819d36ed2d921f8124509f899b446b5a3 @throwern committed Apr 2, 2012
Showing with 369 additions and 19 deletions.
  1. +41 −0 .rvmrc
  2. +6 −7 Gemfile
  3. +1 −1 LICENSE.txt
  4. +29 −3 README.rdoc
  5. +15 −8 Rakefile
  6. +1 −0 VERSION
  7. 0 bin/bgzip
  8. 0 bin/tabix
  9. +72 −0 bio-tabix.gemspec
  10. +73 −0 ext/tabix/mkrf_conf.rb
  11. +2 −0 lib/bio-tabix.rb
  12. +1 −0 lib/bio/tabix/Version
  13. +49 −0 lib/bio/tabix/binding.rb
  14. +54 −0 lib/bio/tabix/index.rb
  15. +25 −0 lib/bio/tabix/library.rb
View
41 .rvmrc
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# This is an RVM Project .rvmrc file, used to automatically load the ruby
+# development environment upon cd'ing into the directory
+
+# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional.
+environment_id="ruby-1.9.2-p290@tabix"
+
+#
+# First we attempt to load the desired environment directly from the environment
+# file. This is very fast and efficient compared to running through the entire
+# CLI and selector. If you want feedback on which environment was used then
+# insert the word 'use' after --create as this triggers verbose mode.
+#
+if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
+then
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
+
+ if [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]]
+ then
+ . "${rvm_path:-$HOME/.rvm}/hooks/after_use"
+ fi
+else
+ # If the environment file has not yet been created, use the RVM CLI to select.
+ if ! rvm --create "$environment_id"
+ then
+ echo "Failed to create RVM environment ''."
+ fi
+fi
+
+#
+# If you use an RVM gemset file to install a list of gems (*.gems), you can have
+# it be automatically loaded. Uncomment the following and adjust the filename if
+# necessary.
+#
+# filename=".gems"
+# if [[ -s "$filename" ]] ; then
+# rvm gemset import "$filename" | grep -v already | grep -v listed | grep -v complete | sed '/^$/d'
+# fi
+
View
13 Gemfile
@@ -1,14 +1,13 @@
source "http://rubygems.org"
# Add dependencies required to use your gem here.
-# Example:
-# gem "activesupport", ">= 2.3.5"
+gem "ffi"
# Add dependencies to develop your gem here.
# Include everything needed to run rake, tests, features, etc.
group :development do
- gem "shoulda", ">= 0"
- gem "rdoc", "~> 3.12"
- gem "bundler", "~> 1.0.0"
- gem "jeweler", "~> 1.8.3"
- gem "rcov", ">= 0"
+ gem "shoulda"
+ gem "rdoc"
+ gem "bundler"
+ gem "jeweler"
+ gem "simplecov"
end
View
2 LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2012 throwern
+Copyright (c) 2012 Nicholas A. Thrower
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
View
32 README.rdoc
@@ -1,9 +1,35 @@
= bio-tabix
-Description goes here.
+Ruby binding for the tabix file indexing routines within the samtools package
+http://samtools.sourceforge.net/
+
+Tabix provides utilities for indexing and subsequently querying regions of interest from large tab delimited files.
+
+Files are indexed on three columns: [Group, pos1, pos2] and must be position sorted
+
+== Usage
+
+Open the file, an index will be created if it does not exist. Use :force => true to overwrite an existing index.
+
+ tabix_file = Bio::Tabix::Index.open(my_txt_file, {:s => group_col, :b => pos1_col, :e => pos2_col})
+
+
+Create a proc or lambda. This will be called with the value of each fileline
+ my_proc = lambda do |line|
+ # convert text to array and print column 7
+ puts line.split("\t")[6]
+ end
+
+Process a region
+ tabix_file.process_region(group_name, pos1, pos2, my_proc)
+
+== Installation
+'gem install bio-samtools'
+
+== Dependencies:
+-FFI (http://github.com/ffi/ffi)
== Contributing to bio-tabix
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
* Fork the project.
@@ -14,6 +40,6 @@ Description goes here.
== Copyright
-Copyright (c) 2012 throwern. See LICENSE.txt for
+Copyright (c) 2012 Nicholas A Thrower. See LICENSE.txt for
further details.
View
23 Rakefile
@@ -17,11 +17,13 @@ Jeweler::Tasks.new do |gem|
gem.name = "bio-tabix"
gem.homepage = "http://github.com/throwern/bio-tabix"
gem.license = "MIT"
- gem.summary = %Q{TODO: one-line summary of your gem}
- gem.description = %Q{TODO: longer description of your gem}
+ gem.summary = %Q{Ruby binding for samtools tabix}
+ gem.description = %Q{Tabix file indexing routines from the samtools package http://samtools.sourceforge.net/}
gem.email = "throwern@msu.edu"
gem.authors = ["throwern"]
# dependencies defined in Gemfile
+ gem.extensions = "ext/tabix/mkrf_conf.rb"
+ gem.executables = ["tabix","bgzip"]
end
Jeweler::RubygemsDotOrgTasks.new
@@ -32,12 +34,17 @@ Rake::TestTask.new(:test) do |test|
test.verbose = true
end
-require 'rcov/rcovtask'
-Rcov::RcovTask.new do |test|
- test.libs << 'test'
- test.pattern = 'test/**/test_*.rb'
- test.verbose = true
- test.rcov_opts << '--exclude "gems/*"'
+# require 'rcov/rcovtask'
+# Rcov::RcovTask.new do |test|
+# test.libs << 'test'
+# test.pattern = 'test/**/test_*.rb'
+# test.verbose = true
+# test.rcov_opts << '--exclude "gems/*"'
+# end
+desc "Code coverage detail"
+task :simplecov do
+ ENV['COVERAGE'] = "true"
+ Rake::Task['spec'].execute
end
task :default => :test
View
1 VERSION
@@ -0,0 +1 @@
+0.1.0
View
0 bin/bgzip
No changes.
View
0 bin/tabix
No changes.
View
72 bio-tabix.gemspec
@@ -0,0 +1,72 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
+# -*- encoding: utf-8 -*-
+
+Gem::Specification.new do |s|
+ s.name = %q{bio-tabix}
+ s.version = "0.1.0"
+
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+ s.authors = ["throwern"]
+ s.date = %q{2012-04-13}
+ s.description = %q{Tabix file indexing routines from the samtools package http://samtools.sourceforge.net/}
+ s.email = %q{throwern@msu.edu}
+ s.executables = ["tabix", "bgzip"]
+ s.extensions = ["ext/tabix/mkrf_conf.rb"]
+ s.extra_rdoc_files = [
+ "LICENSE.txt",
+ "README.rdoc"
+ ]
+ s.files = [
+ ".document",
+ ".rvmrc",
+ "Gemfile",
+ "LICENSE.txt",
+ "README.rdoc",
+ "Rakefile",
+ "VERSION",
+ "ext/tabix/Rakefile",
+ "ext/tabix/mkrf_conf.rb",
+ "lib/bio-tabix.rb",
+ "lib/bio/tabix/Version",
+ "lib/bio/tabix/binding.rb",
+ "lib/bio/tabix/index.rb",
+ "lib/bio/tabix/library.rb",
+ "test/helper.rb",
+ "test/test_bio-tabix.rb"
+ ]
+ s.homepage = %q{http://github.com/throwern/bio-tabix}
+ s.licenses = ["MIT"]
+ s.require_paths = ["lib"]
+ s.rubygems_version = %q{1.6.2}
+ s.summary = %q{Ruby binding for samtools tabix}
+
+ if s.respond_to? :specification_version then
+ s.specification_version = 3
+
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
+ s.add_runtime_dependency(%q<ffi>, [">= 0"])
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
+ s.add_development_dependency(%q<rdoc>, [">= 0"])
+ s.add_development_dependency(%q<bundler>, [">= 0"])
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
+ s.add_development_dependency(%q<simplecov>, [">= 0"])
+ else
+ s.add_dependency(%q<ffi>, [">= 0"])
+ s.add_dependency(%q<shoulda>, [">= 0"])
+ s.add_dependency(%q<rdoc>, [">= 0"])
+ s.add_dependency(%q<bundler>, [">= 0"])
+ s.add_dependency(%q<jeweler>, [">= 0"])
+ s.add_dependency(%q<simplecov>, [">= 0"])
+ end
+ else
+ s.add_dependency(%q<ffi>, [">= 0"])
+ s.add_dependency(%q<shoulda>, [">= 0"])
+ s.add_dependency(%q<rdoc>, [">= 0"])
+ s.add_dependency(%q<bundler>, [">= 0"])
+ s.add_dependency(%q<jeweler>, [">= 0"])
+ s.add_dependency(%q<simplecov>, [">= 0"])
+ end
+end
+
View
73 ext/tabix/mkrf_conf.rb
@@ -0,0 +1,73 @@
+#(c) Copyright 2012 Nicholas A Thrower. All Rights Reserved.
+# Derivative of https://github.com/helios/bioruby-samtools/blob/master/ext/mkrf_conf.rb
+# create Rakefile for shared library compilation
+
+path = File.expand_path(File.dirname(__FILE__))
+
+path_external = File.join(path, "../../lib/bio/tabix/")
+
+version = File.open(File.join(path_external,"Version"),'r')
+Version = version.read
+version.close
+
+url = "http://samtools.svn.sourceforge.net/viewvc/samtools/trunk/tabix/?view=tar"
+TabixFile = "tabix-trunk.tar"
+
+File.open(File.join(path,"Rakefile"),"w") do |rakefile|
+rakefile.write <<-RAKE
+require 'rbconfig'
+require 'open-uri'
+require 'fileutils'
+include FileUtils::Verbose
+require 'rake/clean'
+
+URL = "#{url}"
+
+task :download do
+ open(URL) do |uri|
+ File.open("#{TabixFile}",'wb') do |fout|
+ fout.write(uri.read)
+ end #fout
+ end #uri
+end
+
+task :compile do
+ sh "tar xvf #{TabixFile}"
+ cd("tabix") do
+ #sh "patch < ../Makefile-bioruby.patch"
+ sh "make"
+ cp("libtabix.a","#{path_external}")
+ case Config::CONFIG['host_os']
+ when /linux/
+ sh "make libtabix.so.1"
+ cp("libtabix.so.1","#{path_external}")
+ when /darwin/
+ sh "make libtabix.1.dylib"
+ cp("libtabix.1.dylib","#{path_external}")
+ else raise NotImplementedError, "Tabix not supported on your platform"
+ end #case
+ cp("tabix", "#{path}/../../bin/")
+ chmod 0755, "#{path}/../../bin/tabix"
+ cp("bgzip", "#{path}/../../bin/")
+ chmod 0755, "#{path}/../../bin/bgzip"
+ end #cd
+end
+
+task :clean do
+ # cd("tabix-#{Version}") do
+ # sh "make clean"
+ # end
+ # rm("#{TabixFile}")
+ # rm_rf("tabix-#{Version}")
+ cd("tabix") do
+ sh "make clean"
+ end
+ rm("#{TabixFile}")
+ rm_rf("tabix")
+end
+
+task :default => [:download, :compile, :clean]
+
+RAKE
+
+end
View
2 lib/bio-tabix.rb
@@ -0,0 +1,2 @@
+require 'ffi'
+require 'bio/tabix/index'
View
1 lib/bio/tabix/Version
@@ -0,0 +1 @@
+0.2.5
View
49 lib/bio/tabix/binding.rb
@@ -0,0 +1,49 @@
+require 'bio/tabix/library'
+module Bio
+ module Tabix
+ module Binding
+ extend FFI::Library
+ ffi_lib Bio::Tabix::Library.filename
+
+ # CLASSES
+ class KString < FFI::Struct
+ layout(
+ :l,:size_t,
+ :m,:size_t,
+ :s,:string)
+ end
+
+ class TabixT < FFI::Struct
+ layout(
+ :fp,:pointer,
+ :idx,:pointer,
+ :fn,:string,
+ :fnidx,:string
+ )
+ end
+
+ class IterT < FFI::Struct
+ layout(
+ :from_first,:int,
+ :tid,:int,
+ :beg,:int,
+ :end,:int,
+ :n_off,:int,
+ :i,:int,
+ :finished,:int,
+ :curr_off,:uint64,
+ :str,KString,
+ :idx,:pointer,
+ :off,:pointer)
+ end
+
+ # FUNCTIONS
+ attach_function :ti_open, [:string, :string], :pointer # filename, idxname (or 0) : TabixT*
+ attach_function :ti_read, [:pointer, :pointer, :pointer], :string # TabixT*, ti_iter_t, len : string
+ attach_function :ti_query, [:pointer,:string,:int,:int], IterT # TabixT*, name, beg, end : IterT
+ attach_function :ti_close, [:pointer], :void # TabixT*
+ attach_function :ti_iter_destroy, [IterT], :void # ti_iter_t
+
+ end
+ end
+end
View
54 lib/bio/tabix/index.rb
@@ -0,0 +1,54 @@
+require 'bio/tabix/binding'
+
+module Bio
+ module Tabix
+ class Index
+ include Bio::Tabix::Binding
+ attr_accessor :file,:index,:t_file,:t_file_p
+
+ def self.build(f,opts={})
+ end
+
+ def self.open(*args)
+ self.new(*args).open
+ end
+
+ def initialize(f,opts={})
+ @file = f
+ @index = opts[:i]||file+".tbi"
+ return self
+ end
+
+ def open
+ if(@t_file)
+ self.close
+ end
+ raise "FileNotFound #{file}" unless(File.exist?(file)) or file =~ /http:\/\/|ftp:\/\//
+ raise "FileNotFound #{index} -- use -i to supply custom index" unless(File.exist?(index)) or index =~ /http:\/\/|ftp:\/\//
+ @t_file_p = ti_open(file,index)
+ raise "FileAcessError #{file}" if @t_file_p.null?
+ @t_file = TabixT.new(@t_file_p)
+ return self
+ end
+
+ def close
+ if(@t_file_p)
+ begin
+ ti_close(@t_file_p)
+ rescue
+ puts "Error closing file"
+ end
+ end
+ end
+
+ def process_region(group,pos1,pos2,user_proc)
+ iter = IterT.new(ti_query(t_file_p,group,pos1,pos2))
+ len = FFI::MemoryPointer.new(:int)
+ while( (s = ti_read(t_file_p, iter, len)) )
+ user_proc.call(s,len)
+ end
+ ti_iter_destroy(iter)
+ end
+ end
+ end
+end
View
25 lib/bio/tabix/library.rb
@@ -0,0 +1,25 @@
+module Bio
+ module Tabix
+ class Library
+ def self.filename
+ lib_os = case RUBY_PLATFORM
+ when /linux/
+ 'so.1'
+ when /darwin/
+ '1.dylib'
+ else
+ case RUBY_DESCRIPTION
+ when /darwin.*java/
+ '1.dylib'
+ when /linux.*java/
+ 'so.1'
+ else raise NotImplementedError, "Tabix not supported on your platform"
+ end
+ end
+
+ File.join(File.expand_path(File.dirname(__FILE__)),"libtabix.#{lib_os}")
+ end
+ #module_function :filename
+ end
+ end
+end

0 comments on commit 5ae07f5

Please sign in to comment.
Something went wrong with that request. Please try again.