Permalink
Browse files

intial import

  • Loading branch information...
mjy committed Mar 18, 2010
1 parent 4f0ce23 commit 83d9dc5d7b1df473ccd9418f4eaa77835a1faa68
Showing with 2,541 additions and 14 deletions.
  1. +20 −0 MIT-LICENSE
  2. +13 −0 README
  3. +1 −0 init.rb
  4. +1 −0 install.rb
  5. +66 −0 lib/lexer.rb
  6. +282 −0 lib/nexus_file.rb
  7. 0 lib/nexus_parser.rb
  8. +334 −0 lib/parser.rb
  9. +269 −0 lib/tokens.rb
  10. +4 −0 tasks/nexus_parser_tasks.rake
  11. +234 −0 test/MX_test_03.nex
  12. +0 −10 test/helper.rb
  13. +382 −0 test/test.nex
  14. +934 −4 test/test_nexus_parser.rb
  15. +1 −0 uninstall.rb
View
@@ -0,0 +1,20 @@
+Copyright (c) 2008 [name of plugin creator]
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
View
13 README
@@ -0,0 +1,13 @@
+NexusParser
+===========
+
+Introduction goes here.
+
+
+Example
+=======
+
+Example goes here.
+
+
+Copyright (c) 2008 Matt Yoder, released under the MIT license
View
@@ -0,0 +1 @@
+# Include hook code here
View
@@ -0,0 +1 @@
+# Install hook code here
View
@@ -0,0 +1,66 @@
+
+
+class NexusFile::Lexer
+
+ def initialize(input)
+ @input = input
+ # linefeed check the input here -
+ @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
+ @next_token = nil
+ end
+
+ # checks whether the next token is of the specified class.
+ def peek(token_class)
+ token = read_next_token(token_class)
+ return token.class == token_class
+ end
+
+ # return (and delete) the next token from the input stream, or raise an exception
+ # if the next token is not of the given class.
+ def pop(token_class)
+ token = read_next_token(token_class)
+ @next_token = nil
+ if token.class != token_class
+ raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
+ else
+ return token
+ end
+ end
+
+ private
+ # read (and store) the next token from the input, if it has not already been read.
+ def read_next_token(token_class)
+ if @next_token
+ return @next_token
+ else
+ # check for a match on the specified class first
+ if match(token_class)
+ return @next_token
+ else
+ # now check all the tokens for a match
+ NexusFile::Tokens.nexus_file_token_list.each {|t|
+ return @next_token if match(t)
+ }
+ end
+ # no match, either end of string or lex-error
+ if @input != ''
+ raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
+ else
+ return nil
+ end
+ end
+ end
+
+ def match(token_class)
+ if (m = token_class.regexp.match(@input))
+ @next_token = token_class.new(m[1])
+ @input = @input[m.end(0)..-1]
+ return true
+ else
+ return false
+ end
+ end
+end
+
+
+
View
@@ -0,0 +1,282 @@
+# NexusParser
+
+# version 0.3 by Matt Yoder
+# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
+# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
+
+# outstanding issues:
+## need to resolve Tokens Labels, ValuePair, IDs
+
+module NexusFile
+
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
+
+class NexusFile
+
+ attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
+
+ def initialize
+ @taxa = []
+ @characters = []
+ @sets = []
+ @codings = []
+ @notes = []
+ @vars = {}
+ end
+
+ class Character
+ attr_accessor :name, :states, :notes
+ def initialize
+ @name = nil
+ @states = {}
+ @notes = []
+ end
+
+ # requires :label
+ def add_state(options = {})
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:label]
+
+ @states.update(@opt[:label] => ChrState.new(@opt[:name]))
+ end
+
+ # test this
+ def state_labels
+ @states.keys.sort
+ end
+
+ def name
+ ((@name == "") || (@name == nil)) ? "Undefined" : @name
+ end
+ end
+
+ class Taxon
+ attr_accessor :name, :mesq_id, :notes
+ def initialize
+ @name = ""
+ @mesq_id = ""
+ @notes = []
+ end
+ end
+
+ class ChrState
+ # state is stored as a key in Characters.states
+ attr_accessor :name, :notes
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ class Coding
+ # unfortunately we need this for notes
+ attr_accessor :states, :notes
+ def initialize(options = {})
+ @states = options[:states]
+ @notes = []
+ end
+
+ def states
+ @states.class == Array ? @states : [@states]
+ end
+ end
+
+ class Note
+ attr_accessor :vars
+ def initialize(options = {})
+ @vars = options
+ end
+
+ def note
+ n = ''
+ if @vars[:tf]
+ n = @vars[:tf]
+ elsif @vars[:text]
+ n = @vars[:text]
+ else
+ n = 'No text recovered, possible parsing error.'
+ end
+
+ # THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
+ if n[0..2] =~ /\A\s*\(\s*CM\s*/i
+ n.strip!
+ n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
+ n.strip!
+ n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
+ n.strip!
+ n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
+ n = n[1..-2] if n[0..0] == '"'
+ end
+ n.strip
+ end
+ end
+
+end
+
+
+# constructs the NexusFile
+class Builder
+
+ def initialize
+ @nf = NexusFile.new
+ end
+
+ def stub_taxon
+ @nf.taxa.push(NexusFile::Taxon.new)
+ return @nf.taxa.size
+ end
+
+ def stub_chr
+ @nf.characters.push(NexusFile::Character.new)
+ return @nf.characters.size
+ end
+
+ def code_row(taxon_index, rowvector)
+
+ @nf.characters.each_with_index do |c, i|
+ @nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
+ @nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
+
+ # !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
+
+ existing_states = @nf.characters[i].state_labels
+
+ new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
+ new_states.delete("?") # we don't add this to the db
+ new_states = new_states - existing_states
+
+ new_states.each do |s|
+ @nf.characters[i].add_state(:label => s)
+ end
+
+ end
+ end
+
+ def add_var(hash)
+ hash.keys.each do |k|
+ raise "var #{k} has already been set" if @nf.vars[:k]
+ end
+ @nf.vars.update(hash)
+ end
+
+ def update_taxon(options = {})
+
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:index]
+
+ (@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
+ end
+
+ # legal hash keys are :index, :name, and integers that point to state labels
+ def update_chr(options = {} )
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:index]
+
+ @index = @opt[:index].to_i
+
+ # need to create the characters
+
+ raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
+
+ (@nf.characters[@index].name = @opt[:name]) if @opt[:name]
+
+ @opt.delete(:index)
+ @opt.delete(:name)
+
+ # the rest have states
+ @opt.keys.each do |k|
+
+ if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
+
+ ## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
+ update_state(@index, :index => k, :name => @opt[k])
+
+ else # doesn't, create it
+ @nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
+ end
+ end
+
+ end
+
+ def update_state(chr_index, options = {})
+ # only handling name now
+ #options.keys.each do |k|
+ @nf.characters[chr_index].states[options[:index]].name = options[:name]
+ # add notes here
+ # end
+ end
+
+ def add_note(options = {})
+ @opt = {
+ :text => ''
+ }.merge!(options)
+
+ case @opt[:type]
+
+ # Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
+ when 'TEXT' # a footnote
+ if @opt[:file]
+ @nf.notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:taxon] && !@opt[:character]
+ @nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:character] && !@opt[:taxon]
+
+ @nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
+ end
+
+ when 'AN' # an annotation, rather than a footnote, same dif
+ if @opt[:t] && @opt[:c]
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
+ elsif @opt[:t]
+ @nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
+ elsif @opt[:c]
+ @nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
+ end
+ end
+
+ end
+
+ def nexus_file
+ @nf
+ end
+
+end # end file
+
+ # NexusFile::ParseError
+ class ParseError < StandardError
+ end
+
+
+end # end module
+
+
+def parse_nexus_file(input)
+ @input = input
+ @input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
+
+ # quickly peek at the input, does this look like a Nexus file?
+ if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
+ raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
+ end
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(@input)
+ NexusFile::Parser.new(lexer, builder).parse_file
+
+ return builder.nexus_file
+end
+
View
No changes.
Oops, something went wrong.

0 comments on commit 83d9dc5

Please sign in to comment.