Skip to content
Browse files

intial import

  • Loading branch information...
1 parent 4f0ce23 commit 83d9dc5d7b1df473ccd9418f4eaa77835a1faa68 @mjy committed
Showing with 2,541 additions and 14 deletions.
  1. +20 −0 MIT-LICENSE
  2. +13 −0 README
  3. +1 −0 init.rb
  4. +1 −0 install.rb
  5. +66 −0 lib/lexer.rb
  6. +282 −0 lib/nexus_file.rb
  7. 0 lib/nexus_parser.rb
  8. +334 −0 lib/parser.rb
  9. +269 −0 lib/tokens.rb
  10. +4 −0 tasks/nexus_parser_tasks.rake
  11. +234 −0 test/MX_test_03.nex
  12. +0 −10 test/helper.rb
  13. +382 −0 test/test.nex
  14. +934 −4 test/test_nexus_parser.rb
  15. +1 −0 uninstall.rb
View
20 MIT-LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2008 [name of plugin creator]
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
View
13 README
@@ -0,0 +1,13 @@
+NexusParser
+===========
+
+Introduction goes here.
+
+
+Example
+=======
+
+Example goes here.
+
+
+Copyright (c) 2008 Matt Yoder, released under the MIT license
View
1 init.rb
@@ -0,0 +1 @@
+# Include hook code here
View
1 install.rb
@@ -0,0 +1 @@
+# Install hook code here
View
66 lib/lexer.rb
@@ -0,0 +1,66 @@
+
+
+class NexusFile::Lexer
+
+ def initialize(input)
+ @input = input
+ # linefeed check the input here -
+ @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
+ @next_token = nil
+ end
+
+ # checks whether the next token is of the specified class.
+ def peek(token_class)
+ token = read_next_token(token_class)
+ return token.class == token_class
+ end
+
+ # return (and delete) the next token from the input stream, or raise an exception
+ # if the next token is not of the given class.
+ def pop(token_class)
+ token = read_next_token(token_class)
+ @next_token = nil
+ if token.class != token_class
+ raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
+ else
+ return token
+ end
+ end
+
+ private
+ # read (and store) the next token from the input, if it has not already been read.
+ def read_next_token(token_class)
+ if @next_token
+ return @next_token
+ else
+ # check for a match on the specified class first
+ if match(token_class)
+ return @next_token
+ else
+ # now check all the tokens for a match
+ NexusFile::Tokens.nexus_file_token_list.each {|t|
+ return @next_token if match(t)
+ }
+ end
+ # no match, either end of string or lex-error
+ if @input != ''
+ raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
+ else
+ return nil
+ end
+ end
+ end
+
+ def match(token_class)
+ if (m = token_class.regexp.match(@input))
+ @next_token = token_class.new(m[1])
+ @input = @input[m.end(0)..-1]
+ return true
+ else
+ return false
+ end
+ end
+end
+
+
+
View
282 lib/nexus_file.rb
@@ -0,0 +1,282 @@
+# NexusParser
+
+# version 0.3 by Matt Yoder
+# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
+# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
+
+# outstanding issues:
+## need to resolve Tokens Labels, ValuePair, IDs
+
+module NexusFile
+
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
+
+class NexusFile
+
+ attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
+
+ def initialize
+ @taxa = []
+ @characters = []
+ @sets = []
+ @codings = []
+ @notes = []
+ @vars = {}
+ end
+
+ class Character
+ attr_accessor :name, :states, :notes
+ def initialize
+ @name = nil
+ @states = {}
+ @notes = []
+ end
+
+ # requires :label
+ def add_state(options = {})
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:label]
+
+ @states.update(@opt[:label] => ChrState.new(@opt[:name]))
+ end
+
+ # test this
+ def state_labels
+ @states.keys.sort
+ end
+
+ def name
+ ((@name == "") || (@name == nil)) ? "Undefined" : @name
+ end
+ end
+
+ class Taxon
+ attr_accessor :name, :mesq_id, :notes
+ def initialize
+ @name = ""
+ @mesq_id = ""
+ @notes = []
+ end
+ end
+
+ class ChrState
+ # state is stored as a key in Characters.states
+ attr_accessor :name, :notes
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ class Coding
+ # unfortunately we need this for notes
+ attr_accessor :states, :notes
+ def initialize(options = {})
+ @states = options[:states]
+ @notes = []
+ end
+
+ def states
+ @states.class == Array ? @states : [@states]
+ end
+ end
+
+ class Note
+ attr_accessor :vars
+ def initialize(options = {})
+ @vars = options
+ end
+
+ def note
+ n = ''
+ if @vars[:tf]
+ n = @vars[:tf]
+ elsif @vars[:text]
+ n = @vars[:text]
+ else
+ n = 'No text recovered, possible parsing error.'
+ end
+
+ # THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
+ if n[0..2] =~ /\A\s*\(\s*CM\s*/i
+ n.strip!
+ n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
+ n.strip!
+ n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
+ n.strip!
+ n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
+ n = n[1..-2] if n[0..0] == '"'
+ end
+ n.strip
+ end
+ end
+
+end
+
+
+# constructs the NexusFile
+class Builder
+
+ def initialize
+ @nf = NexusFile.new
+ end
+
+ def stub_taxon
+ @nf.taxa.push(NexusFile::Taxon.new)
+ return @nf.taxa.size
+ end
+
+ def stub_chr
+ @nf.characters.push(NexusFile::Character.new)
+ return @nf.characters.size
+ end
+
+ def code_row(taxon_index, rowvector)
+
+ @nf.characters.each_with_index do |c, i|
+ @nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
+ @nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
+
+ # !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
+
+ existing_states = @nf.characters[i].state_labels
+
+ new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
+ new_states.delete("?") # we don't add this to the db
+ new_states = new_states - existing_states
+
+ new_states.each do |s|
+ @nf.characters[i].add_state(:label => s)
+ end
+
+ end
+ end
+
+ def add_var(hash)
+ hash.keys.each do |k|
+ raise "var #{k} has already been set" if @nf.vars[:k]
+ end
+ @nf.vars.update(hash)
+ end
+
+ def update_taxon(options = {})
+
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:index]
+
+ (@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
+ end
+
+ # legal hash keys are :index, :name, and integers that point to state labels
+ def update_chr(options = {} )
+ @opt = {
+ :name => ''
+ }.merge!(options)
+ return false if !@opt[:index]
+
+ @index = @opt[:index].to_i
+
+ # need to create the characters
+
+ raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
+
+ (@nf.characters[@index].name = @opt[:name]) if @opt[:name]
+
+ @opt.delete(:index)
+ @opt.delete(:name)
+
+ # the rest have states
+ @opt.keys.each do |k|
+
+ if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
+
+ ## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
+ update_state(@index, :index => k, :name => @opt[k])
+
+ else # doesn't, create it
+ @nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
+ end
+ end
+
+ end
+
+ def update_state(chr_index, options = {})
+ # only handling name now
+ #options.keys.each do |k|
+ @nf.characters[chr_index].states[options[:index]].name = options[:name]
+ # add notes here
+ # end
+ end
+
+ def add_note(options = {})
+ @opt = {
+ :text => ''
+ }.merge!(options)
+
+ case @opt[:type]
+
+ # Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
+ when 'TEXT' # a footnote
+ if @opt[:file]
+ @nf.notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:taxon] && !@opt[:character]
+ @nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
+
+ elsif @opt[:character] && !@opt[:taxon]
+
+ @nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
+ end
+
+ when 'AN' # an annotation, rather than a footnote, same dif
+ if @opt[:t] && @opt[:c]
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
+ elsif @opt[:t]
+ @nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
+ elsif @opt[:c]
+ @nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
+ end
+ end
+
+ end
+
+ def nexus_file
+ @nf
+ end
+
+end # end file
+
+ # NexusFile::ParseError
+ class ParseError < StandardError
+ end
+
+
+end # end module
+
+
+def parse_nexus_file(input)
+ @input = input
+ @input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
+
+ # quickly peek at the input, does this look like a Nexus file?
+ if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
+ raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
+ end
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(@input)
+ NexusFile::Parser.new(lexer, builder).parse_file
+
+ return builder.nexus_file
+end
+
View
0 lib/nexus_parser.rb
No changes.
View
334 lib/parser.rb
@@ -0,0 +1,334 @@
+
+class NexusFile::Parser
+
+ def initialize(lexer, builder)
+ @lexer = lexer
+ @builder = builder
+ end
+
+ def parse_file
+ # nf = @builder.new_nexus_file # create new local NexusFile instance, nf
+ blks = []
+ @lexer.pop(NexusFile::Tokens::NexusStart)
+
+ while @lexer.peek(NexusFile::Tokens::BeginBlk)
+
+ @lexer.pop(NexusFile::Tokens::BeginBlk) # pop it
+
+ if @lexer.peek(NexusFile::Tokens::AuthorsBlk)
+ parse_authors_blk
+
+ # we parse these below
+ elsif @lexer.peek(NexusFile::Tokens::TaxaBlk)
+
+ @lexer.pop(NexusFile::Tokens::TaxaBlk )
+ parse_taxa_blk
+
+ elsif @lexer.peek(NexusFile::Tokens::ChrsBlk)
+ @lexer.pop(NexusFile::Tokens::ChrsBlk)
+ parse_characters_blk
+
+ elsif @lexer.peek(NexusFile::Tokens::NotesBlk)
+ @lexer.pop(NexusFile::Tokens::NotesBlk)
+ parse_notes_blk
+
+ # we should parse this
+ elsif @lexer.peek(NexusFile::Tokens::SetsBlk)
+ @lexer.pop(NexusFile::Tokens::SetsBlk)
+
+ # we don't parse these
+ elsif @lexer.peek(NexusFile::Tokens::TreesBlk)
+ @foo = @lexer.pop(NexusFile::Tokens::TreesBlk).value
+
+ elsif @lexer.peek(NexusFile::Tokens::LabelsBlk)
+ @lexer.pop(NexusFile::Tokens::LabelsBlk)
+
+ elsif @lexer.peek(NexusFile::Tokens::MqCharModelsBlk)
+ @lexer.pop(NexusFile::Tokens::MqCharModelsBlk)
+
+ elsif @lexer.peek(NexusFile::Tokens::AssumptionsBlk)
+ @lexer.pop(NexusFile::Tokens::AssumptionsBlk)
+
+ elsif @lexer.peek(NexusFile::Tokens::CodonsBlk)
+ @lexer.pop(NexusFile::Tokens::CodonsBlk)
+ end
+
+ end
+ end
+
+ # just removes it for the time being
+ def parse_authors_blk
+ # thing has non single word key/value pairs, like "AUTHOR NAME", SIGH
+ # for now just slurp it all up.
+ @lexer.pop(NexusFile::Tokens::AuthorsBlk )
+
+ #while true
+ # if @lexer.peek(NexusFile::Tokens::EndBlk)
+ # @lexer.pop(NexusFile::Tokens::EndBlk)
+ # break
+ # else
+
+ # while @lexer.peek(NexusFile::Tokens::ValuePair)
+ # # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
+ # @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ # end
+
+ #@lexer.pop(NexusFile::Tokens::ID) if @lexer.peek(NexusFile::Tokens::ID)
+ # end
+ #end
+ end
+
+ def parse_taxa_blk
+ @lexer.pop(NexusFile::Tokens::Title) if @lexer.peek(NexusFile::Tokens::Title)
+
+ # need to not ignore to test against
+ parse_dimensions if @lexer.peek(NexusFile::Tokens::Dimensions)
+
+ while true
+ if @lexer.peek(NexusFile::Tokens::EndBlk)
+ @lexer.pop(NexusFile::Tokens::EndBlk)
+ break
+ else
+
+ if @lexer.peek(NexusFile::Tokens::Taxlabels)
+ @lexer.pop(NexusFile::Tokens::Taxlabels) if @lexer.peek(NexusFile::Tokens::Taxlabels)
+ i = 0
+ while @lexer.peek(NexusFile::Tokens::Label)
+ @builder.update_taxon(:index => i, :name => @lexer.pop(NexusFile::Tokens::Label).value)
+ i += 1
+ end
+ @lexer.pop(NexusFile::Tokens::SemiColon) if @lexer.peek(NexusFile::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
+
+ elsif @lexer.peek(NexusFile::Tokens::MesquiteIDs)
+
+ @lexer.pop(NexusFile::Tokens::MesquiteIDs) # trashing these for now
+ elsif @lexer.peek(NexusFile::Tokens::MesquiteBlockID)
+ @lexer.pop(NexusFile::Tokens::MesquiteBlockID)
+ end
+
+ end
+ end
+
+
+ end
+
+ def parse_characters_blk
+ while true
+ if @lexer.peek(NexusFile::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
+ break
+ else
+ @lexer.pop(NexusFile::Tokens::Title) if @lexer.peek(NexusFile::Tokens::Title) # not used at present
+
+ parse_dimensions if @lexer.peek(NexusFile::Tokens::Dimensions)
+ parse_format if @lexer.peek(NexusFile::Tokens::Format)
+
+ parse_chr_state_labels if @lexer.peek(NexusFile::Tokens::CharStateLabels)
+
+ parse_matrix if @lexer.peek(NexusFile::Tokens::Matrix)
+
+ # handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)
+
+ @lexer.pop(NexusFile::Tokens::MesquiteIDs) if @lexer.peek(NexusFile::Tokens::MesquiteIDs) # trashing these for now
+ @lexer.pop(NexusFile::Tokens::MesquiteBlockID) if @lexer.peek(NexusFile::Tokens::MesquiteBlockID) # trashing these for now
+
+ false
+ end
+ end
+ @lexer.pop(NexusFile::Tokens::EndBlk)
+ end
+
+ # prolly pop header then fuse with parse_dimensions
+ def parse_format
+ @lexer.pop(NexusFile::Tokens::Format)
+ while @lexer.peek(NexusFile::Tokens::ValuePair)
+ @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ end
+
+ check_initialization_of_ntax_nchar
+ end
+
+ def parse_dimensions
+ @lexer.pop(NexusFile::Tokens::Dimensions)
+ while @lexer.peek(NexusFile::Tokens::ValuePair)
+ @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ end
+ # the last value pair with a ; is automagically handled, don't try popping it again
+
+ check_initialization_of_ntax_nchar
+ end
+
+ def check_initialization_of_ntax_nchar
+ # check for character dimensions, if otherwise not set generate them
+ if @builder.nexus_file.vars[:nchar] && @builder.nexus_file.characters == []
+ (0..(@builder.nexus_file.vars[:nchar].to_i - 1)).each {|i| @builder.stub_chr }
+ end
+
+ # check for taxa dimensions, if otherwise not set generate them
+ if @builder.nexus_file.vars[:ntax] && @builder.nexus_file.taxa == []
+ (0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
+ end
+ end
+
+ def parse_chr_state_labels
+ @lexer.pop(NexusFile::Tokens::CharStateLabels)
+
+ while true
+ if @lexer.peek(NexusFile::Tokens::SemiColon)
+ break
+ else
+ opts = {}
+
+ name = ""
+ index = @lexer.pop(NexusFile::Tokens::Number).value.to_i
+ (name = @lexer.pop(NexusFile::Tokens::Label).value) if @lexer.peek(NexusFile::Tokens::Label) # not always given a letter
+
+ @lexer.pop(NexusFile::Tokens::BckSlash) if @lexer.peek(NexusFile::Tokens::BckSlash)
+
+ if !@lexer.peek(NexusFile::Tokens::Comma) || !@lexer.peek(NexusFile::Tokens::SemiColon)
+ i = 0
+
+ # three kludge lines, need to figure out the label/number priority, could be issue in list order w/in tokens
+ while @lexer.peek(NexusFile::Tokens::Label) || @lexer.peek(NexusFile::Tokens::Number)
+ opts.update({i.to_s => @lexer.pop(NexusFile::Tokens::Label).value}) if @lexer.peek(NexusFile::Tokens::Label)
+ opts.update({i.to_s => @lexer.pop(NexusFile::Tokens::Number).value.to_s}) if @lexer.peek(NexusFile::Tokens::Number)
+
+ i += 1
+ end
+ end
+
+ @lexer.pop(NexusFile::Tokens::Comma) if @lexer.peek(NexusFile::Tokens::Comma) # we may also have hit semicolon
+
+ opts.update({:index => (index - 1), :name => name})
+
+ raise(ParserError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
+ @builder.update_chr(opts)
+ end
+
+ end
+ @lexer.pop(NexusFile::Tokens::SemiColon)
+ end
+
+ def parse_matrix
+ @lexer.pop(NexusFile::Tokens::Matrix)
+ i = 0
+ while true
+ if @lexer.peek(NexusFile::Tokens::SemiColon)
+ break
+ else
+ t = @lexer.pop(NexusFile::Tokens::Label).value
+
+ @builder.update_taxon(:index => i, :name => t) # if it exists its not re-added
+
+ @builder.code_row(i, @lexer.pop(NexusFile::Tokens::RowVec).value)
+
+ i += 1
+ end
+ end
+ @lexer.pop(NexusFile::Tokens::SemiColon) # pop the semicolon
+ end
+
+ # this suck(s/ed), it needs work when a better API for Mesquite comes out
+ def parse_notes_blk
+ # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusFile::Note
+
+ @vars = {}
+ inf = 0
+ while true
+ inf += 1
+ raise "Either you have a gazillion notes or more likely parser is caught in an infinite loop inside parse_notes_block" if inf > 100000
+ if @lexer.peek(NexusFile::Tokens::EndBlk)
+ @lexer.pop(NexusFile::Tokens::EndBlk)
+ @builder.add_note(@vars) # one still left to add
+ break
+ else
+
+ if @lexer.peek(NexusFile::Tokens::ValuePair)
+ @vars.update(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+
+ elsif @lexer.peek(NexusFile::Tokens::Label)
+ if @vars[:type] # we have the data for this row write it, and start a new one
+
+ @builder.add_note(@vars)
+ @vars = {}
+ else
+ @vars.update(:type => @lexer.pop(NexusFile::Tokens::Label).value)
+ end
+ elsif @lexer.peek(NexusFile::Tokens::FileLbl)
+ @lexer.pop(NexusFile::Tokens::FileLbl)
+ @vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
+ end
+ end
+ end
+ end
+
+ #@vars = {}
+ #while true
+
+ # break if @lexer.peek(NexusFile::Tokens::EndBlk)
+
+ # @vars.update(:type => @lexer.pop(NexusFile::Tokens::Label).value)
+
+ # kludge to get around the funny construct that references file
+ # if @lexer.peek(NexusFile::Tokens::FileLbl)
+ # @lexer.pop(NexusFile::Tokens::FileLbl)
+ # vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
+ # end
+
+ # while true
+
+ # meh = @lexer.pop(NexusFile::Tokens::ValuePair)
+ # @vars.update(meh.value)
+ # break if !@lexer.peek(NexusFile::Tokens::ValuePair)
+ # end
+ #
+ # @builder.add_note(@vars)
+ # @vars = {}
+ #end
+ # @lexer.pop(NexusFile::Tokens::EndBlk)
+
+
+ def parse_trees_blk
+ true
+ end
+
+ def parse_labels_blk
+
+ end
+
+ def parse_sets_blk
+ end
+
+ def parse_assumptions_blk
+ end
+
+ def parse_codens_blk
+ # not likely
+ end
+
+ def parse_mesquitecharmodels_blk
+ # nor this
+ end
+
+
+ def parse_mesquite_blk
+
+ end
+
+
+
+ # def parse_children(parent)
+ # parse a comma-separated list of nodes
+ # while true
+ # parse_node(parent)
+ # if @lexer.peek(NexusFile::Tokens::Comma)
+ # @lexer.pop(NexusFile::Tokens::Comma)
+ # else
+ # break
+ # end
+ # end
+ # end
+
+end
+
+
+
View
269 lib/tokens.rb
@@ -0,0 +1,269 @@
+module NexusFile::Tokens
+
+ class Token
+ # this allows access the the class attribute regexp, without using a class variable
+ class << self; attr_reader :regexp; end
+ attr_reader :value
+ def initialize(str)
+ @value = str
+ end
+ end
+
+ # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
+ # moving along popping off
+
+ class NexusStart < Token
+ @regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
+ end
+
+ # at present we strip comments pre-parser initialization, because they can be placed anywhere it gets tricky to parse otherwise, and besides, they are non-standard
+ # class NexusComment < Token
+ # @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
+ # def initialize(str)
+ # str = str[1..-2] # strip the []
+ # str.strip!
+ # @value = str
+ # end
+ # end
+
+ class BeginBlk < Token
+ @regexp = Regexp.new(/\A\s*(\s*Begin\s*)/i)
+ end
+
+ class EndBlk < Token
+ @regexp = Regexp.new(/\A\s*([\s\n]*End[\s\n]*;[\s\n]*)/i)
+ end
+
+ # label
+ class AuthorsBlk < Token
+ @regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
+ end
+
+ # label
+ class TaxaBlk < Token
+ @regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
+ end
+
+ # label
+ class NotesBlk < Token
+ @regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
+ end
+
+ class FileLbl < Token
+ @regexp = Regexp.new(/\A\s*(\s*File\s*)\s*/i)
+ end
+
+ # label and content
+ class Title < Token
+ @regexp = Regexp.new(/\A\s*(title[^\;]*;)\s*/i)
+ end
+
+ class Dimensions < Token
+ @regexp = Regexp.new(/\A\s*(DIMENSIONS)\s*/i)
+ end
+
+ class Format < Token
+ @regexp = Regexp.new(/\A\s*(format)\s*/i)
+ end
+
+ # label
+ class Taxlabels < Token
+ @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
+ end
+
+ # same as ID
+ class Label < Token
+ @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') # matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" " # choking on 'Foo_stuff_things'
+ def initialize(str)
+ str.strip!
+ str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
+ str = str[1..-2] if str[0..0] == '"'
+ str.strip!
+ @value = str
+ end
+ end
+
+ class ChrsBlk < Token
+ @regexp = Regexp.new(/\A\s*(characters\s*;)\s*/i)
+ end
+
+ # note we grab EOL and ; here
+ class ValuePair < Token
+ @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+ def initialize(str)
+ str.strip!
+ str = str.split(/=/)
+ str[1].strip!
+ str[1] = str[1][1..-2] if str[1][0..0] == "'"
+ str[1] = str[1][1..-2] if str[1][0..0] == "\""
+ @value = {str[0].strip.downcase.to_sym => str[1].strip}
+ end
+ end
+
+ class Matrix < Token
+ @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
+ end
+
+ class RowVec < Token
+ @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
+ def initialize(str)
+ # meh! Ruby is simpler to read than Perl?
+ # handles both () and {} style multistates
+ s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
+ @value = s
+ end
+ end
+
+ class CharStateLabels < Token
+ @regexp = Regexp.new(/\A\s*(CHARSTATELABELS)\s*/i)
+ end
+
+ class MesquiteIDs < Token
+ @regexp = Regexp.new(/\A\s*(IDS[^;]*;)\s*/i)
+ end
+
+ class MesquiteBlockID < Token
+ @regexp = Regexp.new(/\A\s*(BLOCKID[^;]*;)\s*/i)
+ end
+
+ # unparsed blocks
+
+ class TreesBlk < Token
+ @regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
+ end
+
+ class SetsBlk < Token
+ @regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
+ end
+
+ class MqCharModelsBlk < Token
+ @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
+ end
+
+ class LabelsBlk < Token
+ @regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
+ end
+
+ class AssumptionsBlk < Token
+ @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
+ end
+
+ class CodonsBlk < Token
+ @regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
+ end
+
+ class MesquiteBlk < Token
+ @regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
+ end
+
+ class BlkEnd < Token
+ @regexp = Regexp.new(/\A[\s\n]*(END;)\s*/i)
+ end
+
+ class LBracket < Token
+ @regexp = Regexp.new('\A\s*(\[)\s*')
+ end
+
+ class RBracket < Token
+ @regexp = Regexp.new('\A\s*(\])\s*')
+ end
+
+ class LParen < Token
+ @regexp = Regexp.new('\A\s*(\()\s*')
+ end
+
+ class RParen < Token
+ @regexp = Regexp.new('\A\s*(\))\s*')
+ end
+
+ class Equals < Token
+ @regexp = Regexp.new('\A\s*(=)\s*')
+ end
+
+ class BckSlash < Token
+ @regexp = Regexp.new('\A\s*(\/)\s*')
+ end
+
+ # labels
+ class ID < Token
+ @regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
+ def initialize(str)
+ str.strip!
+ str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
+ @value = str
+ end
+ end
+
+ class Colon < Token
+ @regexp = Regexp.new('\A\s*(:)\s*')
+ end
+
+ class SemiColon < Token
+ @regexp = Regexp.new('\A\s*(;)\s*')
+ end
+
+ class Comma < Token
+ @regexp = Regexp.new('\A\s*(\,)\s*')
+ end
+
+ class Number < Token
+ @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
+ def initialize(str)
+ # a little oddness here, in some case we don't want to include the .0
+ # see issues with numbers as labels
+ if str =~ /\./
+ @value = str.to_f
+ else
+ @value = str.to_i
+ end
+
+ end
+ end
+
+ # NexusFile::Tokens::NexusComment
+
+ # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
+ def self.nexus_file_token_list
+ [ NexusFile::Tokens::NexusStart,
+ NexusFile::Tokens::BeginBlk,
+ NexusFile::Tokens::EndBlk,
+ NexusFile::Tokens::AuthorsBlk,
+ NexusFile::Tokens::SetsBlk,
+ NexusFile::Tokens::MqCharModelsBlk,
+ NexusFile::Tokens::AssumptionsBlk,
+ NexusFile::Tokens::CodonsBlk,
+ NexusFile::Tokens::MesquiteBlk,
+ NexusFile::Tokens::TreesBlk,
+ NexusFile::Tokens::LabelsBlk,
+ NexusFile::Tokens::TaxaBlk,
+ NexusFile::Tokens::NotesBlk,
+ NexusFile::Tokens::Title,
+ NexusFile::Tokens::Taxlabels,
+ NexusFile::Tokens::Dimensions,
+ NexusFile::Tokens::FileLbl,
+ NexusFile::Tokens::Format,
+ NexusFile::Tokens::Equals,
+ NexusFile::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
+ NexusFile::Tokens::CharStateLabels,
+ NexusFile::Tokens::ChrsBlk,
+ NexusFile::Tokens::Number,
+ NexusFile::Tokens::Matrix,
+ NexusFile::Tokens::SemiColon,
+ NexusFile::Tokens::MesquiteIDs,
+ NexusFile::Tokens::MesquiteBlockID,
+ NexusFile::Tokens::BlkEnd,
+ NexusFile::Tokens::Colon,
+ NexusFile::Tokens::BckSlash,
+ NexusFile::Tokens::Comma,
+ NexusFile::Tokens::LParen,
+ NexusFile::Tokens::RParen,
+ NexusFile::Tokens::LBracket,
+ NexusFile::Tokens::RBracket,
+ NexusFile::Tokens::Label, # must be before RowVec
+ NexusFile::Tokens::RowVec,
+ NexusFile::Tokens::ID # need to trash this
+ ]
+ end
+
+end
+
View
4 tasks/nexus_parser_tasks.rake
@@ -0,0 +1,4 @@
+# desc "Explaining what the task does"
+# task :nexus_parser do
+# # Task goes here
+# end
View
234 test/MX_test_03.nex
@@ -0,0 +1,234 @@
+#NEXUS
+[written Sun Apr 20 17:45:47 EDT 2008 by Mesquite version 2.0 (build i69) at procto/192.168.0.100]
+BEGIN AUTHORS;
+AUTHOR NAME = Jonathan_Coddington CODE = JC;
+END;
+
+
+BEGIN TAXA;
+ TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NTAX=10;
+ TAXLABELS
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
+ ;
+ IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
+ BLOCKID JC1191fcddc0c4;
+
+END;
+
+
+BEGIN CHARACTERS;
+ TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NCHAR=10;
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = " 0 1 2 3 4 5 6 7 8 9 A";
+ CHARSTATELABELS
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
+ MATRIX
+ Dictyna 0?00201001
+ Uloborus 0?11000000
+ Deinopis 0?01002???
+ Nephila&Herennia 0?21010011
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
+ Meta 0?01A10011
+ Leucauge_venusta ???--?-??-
+ Pachygnatha 0?210(0,1)0011
+ 'Theridiosoma_01' ??????????
+ Tetragnatha 0?01011011
+
+;
+ IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
+ BLOCKID JC1191fcddc0c0;
+
+
+END;
+BEGIN TREES;
+ Title Imported_trees;
+ LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
+ TRANSLATE
+ 1 Dictyna,
+ 2 Uloborus,
+ 3 Deinopis,
+ 4 Nephila&Herennia,
+ 5 'Nephilengys_cruentata',
+ 6 Meta,
+ 7 Leucauge_venusta,
+ 8 Pachygnatha,
+ 9 'Theridiosoma_01',
+ 10 Tetragnatha;
+ TREE 'Imported tree 1+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 2+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 3+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 4+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 5+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 6+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 7+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 8+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+
+END;
+
+
+BEGIN LABELS;
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
+
+
+END;
+
+BEGIN SETS;
+CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
+
+END;
+
+BEGIN ASSUMPTIONS;
+ TYPESET * UNTITLED = unord: 1 - 10;
+
+ EXSET * UNTITLED = ;
+
+ WTSET * UNTITLED = 1: 1 - 10 ;
+
+END;
+
+BEGIN CODONS;
+CODESET * UNTITLED = universal: 1 - 10;
+
+
+END;
+
+BEGIN MESQUITECHARMODELS;
+ ProbModelSet * UNTITLED = 'Mk1 (est.)': 1 - 10;
+END;
+
+BEGIN NOTES;
+
+ TEXT TAXA = 'Scharff&Coddington_1997_Araneidae' TAXON = 2 TEXT = 'This is a footnote to taxon 2, Uloborus';
+
+ TEXT TAXON = 4 CHARACTER = 8 TEXT = This_is_a_footnote_to_a_cell.;
+
+ TEXT CHARACTER = 10 TEXT = This_is_footnote_to_char_10;
+
+ TEXT FILE TEXT = 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434';
+
+ AN T = 4 A = JC DC = 2008.4.13.20.31.19 DM = 2008.4.13.20.31.38 ID = 01194a57d0161 I = _ TF = (CM 'This is an "annotation" to taxon 4') ;
+
+ AN C = 4 A = JC DC = 2008.4.13.20.31.50 DM = 2008.4.13.20.32.10 ID = 01194a584b9f2 I = _ TF = (CM 'This is an annotation to charcter 4, that has no name.') ;
+
+ AN T = 9 C = 3 A = 0 DC = 2008.4.20.17.24.36 DM = 2008.4.20.17.25.4 ID = 01196db963874 I = _ TF = (CM 'This is an annotation to chr 3, taxa 9, coded ?') ;
+
+ AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ;
+
+ AN T = 7 C = 10 A = 0 DC = 2008.4.20.17.25.11 DM = 2008.4.20.17.26.1 ID = 01196db9ebd25 I = _ TF = (CM 'this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -') ;
+
+END;
+
+Begin MESQUITE;
+ MESQUITESCRIPTVERSION 2;
+ TITLE AUTO;
+ tell ProjectCoordinator;
+ getEmployee #mesquite.minimal.ManageTaxa.ManageTaxa;
+ tell It;
+ setID 0 6498976404544962919 JC1191fcddc0c4;
+ tell It;
+ setDefaultOrder 0 1 2 3 4 5 6 7 9 8;
+ attachments ;
+ endTell;
+ endTell;
+ getEmployee #mesquite.charMatrices.ManageCharacters.ManageCharacters;
+ tell It;
+ setID 0 2280451364422511229 JC1191fcddc0c0;
+ checksum 0 3839411132 JC1191fcddc0c0;
+ endTell;
+ getEmployee #mesquite.charMatrices.BasicDataWindowCoord.BasicDataWindowCoord;
+ tell It;
+ showDataWindow #2280451364422511229 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+ tell It;
+ getWindow;
+ tell It;
+ getTable;
+ tell It;
+ columnWidth 6 55;
+ columnWidth 7 23;
+ endTell;
+ setExplanationSize 30;
+ setAnnotationSize 20;
+ setFontIncAnnot 0;
+ setFontIncExp 0;
+ setSize 925 472;
+ setLocation 357 294;
+ setFont SanSerif;
+ setFontSize 10;
+ getToolPalette;
+ tell It;
+ setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+ endTell;
+ setActive;
+ setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+ colorCells #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+ tell It;
+ togglePanel on;
+ endTell;
+ setBackground White;
+ toggleShowNames off;
+ toggleTight off;
+ toggleShowChanges on;
+ toggleSeparateLines off;
+ toggleShowStates on;
+ toggleAutoWCharNames off;
+ toggleShowDefaultCharNames off;
+ toggleConstrainCW on;
+ setColumnWidth 16;
+ toggleBirdsEye off;
+ toggleColorsPanel off;
+ birdsEyeWidth 2;
+ toggleLinkedScrolling on;
+ toggleScrollLinkedTables off;
+ endTell;
+ showWindow;
+ getEmployee #mesquite.categ.StateNamesEditor.StateNamesEditor;
+ tell It;
+ makeWindow;
+ tell It;
+ setExplanationSize 30;
+ setAnnotationSize 20;
+ setFontIncAnnot 0;
+ setFontIncExp 0;
+ setSize 925 472;
+ setLocation 357 294;
+ setFont SanSerif;
+ setFontSize 10;
+ getToolPalette;
+ tell It;
+ setTool mesquite.categ.StateNamesEditor.StateNamesWindow.ibeam;
+ endTell;
+ rowsAreCharacters on;
+ toggleConstrainChar on;
+ toggleConstrainCharNum 3;
+ togglePanel off;
+ toggleSummaryPanel off;
+ endTell;
+ showWindow;
+ endTell;
+ getEmployee #mesquite.categ.StateNamesStrip.StateNamesStrip;
+ tell It;
+ showStrip off;
+ endTell;
+ getEmployee #mesquite.charMatrices.CharReferenceStrip.CharReferenceStrip;
+ tell It;
+ showStrip off;
+ endTell;
+ getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+ tell It;
+ setColor Red;
+ removeColor off;
+ endTell;
+ getEmployee #mesquite.charMatrices.QuickKeySelector.QuickKeySelector;
+ tell It;
+ autotabOff;
+ endTell;
+ endTell;
+ endTell;
+ endTell;
+end;
+
+
View
10 test/helper.rb
@@ -1,10 +0,0 @@
-require 'rubygems'
-require 'test/unit'
-require 'shoulda'
-
-$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
-$LOAD_PATH.unshift(File.dirname(__FILE__))
-require 'nexus_parser'
-
-class Test::Unit::TestCase
-end
View
382 test/test.nex
@@ -0,0 +1,382 @@
+#NEXUS
+[written Sun Apr 13 13:59:23 EDT 2008 by Mesquite version 2.01+ (build j52) at NMNHN-CODDINGT/172.17.227.145 (Jonathan Coddington)]
+
+BEGIN TAXA;
+ TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NTAX=70;
+ TAXLABELS
+ Dictyna Uloborus Deinopis Nephila Nephilengys Meta Leucauge Pachygnatha Tetragnatha Theridiosoma Linyphia Pimoa Theridion Chorizopes Gasteracantha Aetrocantha Togacantha Gastroxya Augusta Isoxya Austracantha Macracantha Aspidolasius Caerostris Hypognatha Arkys Archemorus Encyosaccus Xylethrus Chaetacis Micrathena Mastophora Cyrtarachne Pasilobus Arachnura Witica Mecynogea Cyrtophora Neogea Argiope Gea Scoloderus Acanthepeira Anepsion Dolophone Hypsosinga Zygiella Kaira Metepeira Singa Larinia Neoscona Mangora Cercidia Pronous Aculepeira Araneus Bertrana Alpaida Enacrosoma Wixia Acacesia Metazygia Eustala Cyclosa Nuctenea Colphepeira Araniella Eriophora Verrucosa
+ ;
+ IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b136 JC1191fcddc2b137 JC1191fcddc2b138 JC1191fcddc2b139 JC1191fcddc2b140 JC1191fcddc2b141 JC1191fcddc2b142 JC1191fcddc2b143 JC1191fcddc2b144 JC1191fcddc2b145 JC1191fcddc2b146 JC1191fcddc2b147 JC1191fcddc2b148 JC1191fcddc2b149 JC1191fcddc2b150 JC1191fcddc2b151 JC1191fcddc2b152 JC1191fcddc2b153 JC1191fcddc2b154 JC1191fcddc2b155 JC1191fcddc2b156 JC1191fcddc2b157 JC1191fcddc2b158 JC1191fcddc2b159 JC1191fcddc2b160 JC1191fcddc2b161 JC1191fcddc2b162 JC1191fcddc2b163 JC1191fcddc2b164 JC1191fcddc2b165 JC1191fcddc2b166 JC1191fcddc2b167 JC1191fcddc2b168 JC1191fcddc2b169 JC1191fcddc2b170 JC1191fcddc2b171 JC1191fcddc2b172 JC1191fcddc2b173 JC1191fcddc2b174 JC1191fcddc2b175 JC1191fcddc2b176 JC1191fcddc2b177 JC1191fcddc2b178 JC1191fcddc2b179 JC1191fcddc2b180 JC1191fcddc2b181 JC1191fcddc2b182 JC1191fcddc2b183 JC1191fcddc2b184 JC1191fcddc2b185 JC1191fcddc2b186 JC1191fcddc2b187 JC1191fcddc2b188 JC1191fcddc2b189 JC1191fcddc2b190 JC1191fcddc2b191 JC1191fcddc2b192 JC1191fcddc2b193 JC1191fcddc2b194 JC1191fcddc2b195 JC1191fcddc2b196 JC1191fcddc2b197 ;
+ BLOCKID JC1191fcddc0c4;
+
+END;
+
+
+BEGIN CHARACTERS;
+ TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NCHAR=82;
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = " 0 1 2 3 4 5 6";
+ CHARSTATELABELS
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 4 'Palp patellar m-setae' / abs pres, 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 Conductor_shape / entire w_lobe, 9 Conductor_wraps_embolus / abs pres, 10 Median_apophysis / pres abs, 11 MA_bifid_prong / abs pres, 12 MA_threadlike_spur / abs pres, 13 MA_w_transparent_flap / abs pres, 14 MA_large_w_fork / abs pres, 15 MA_ant_margin / smooth toothed, 16 Radix / abs pres, 17 Distal_hematodocha / abs pres, 18 Paramedian_apophysis / abs pres, 19 Stipes / abs pres, 20 Subterminal_apophysis / abs pres, 21 Subterm_apophysis / norm bubble, 22 Terminal_apophysis / abs pres, 23 Embolus_tip / simple w_cap, 24 Embolus / clockwise anticlockwise, 25 F_genitalia / entelogyne haplogyne, 26 Epigynum / pres abs, 27 Genital_tubercle / abs pres, 28 Scape / abs pres, 29 Scape / normal elongate, 30 Scape_w_pocket_tip / abs pres, 31 Scape / smooth wrinkled, 32 'M Coxa + TrIV setae' / normal stout, 33 Coxa_I_hook / abs pres, 34 Femur_II_w_groove / abs pres, 35 Femur_IV_trichobothria / abs pres, 36 'Pat-tibial autospasy' / abs pres, 37 MtIV_trichobothria / 0_or_1 many, 38 'Tibia-Metatarsus' / norm flattened, 39 Serrate_accessory_claw_setae / a p, 40 Sustentaculum / abs pres, 41 Carapace / hirsute glabrous, 42 F_carapace_shape / 'L>W' 'W(L', 43 M_carapace_shape / oval Gast wide, 44 Cheliceral_margin / smooth strid, 45 Endite_tooth / abs pres, 46 Clypeal_tooth / abs pres, 47 F_Cephalon / norm enlarged, 48 Cephalon_height / normal high, 49 'Anterior:Posterior MOA (m72)' / '<1' '>1', 50 'Lat.-median eye separation' / '<1' '>1', 51 PME_canoe_tapetum / abs pres, 52 PME_narrow_tapetum / abs pres, 53 PLE_canoe_tapetum / norm red abs, 54 PE_row / str_to_recurved procurved, 55 Feathery_hairs / abs pres, 56 Abdominal_sigillae / abs pres, 57 Abd._sigillae / scatt 1_row 2_rows, 58 F_abd_w_ventral_condyles / abs pres, 59 M_abd._shape / norm Gast U 3 4 5 6, 60 Abdominal_shape / 'L>W' 'W>L', 61 'F-M size' / '<2x' '>2x', 62 Gast_abdominal_spines / abs pres, 63 Booklung_cover / smooth strid, 64 Booklung_cover / norm grooved, 65 Sclerotized_ring / abs scl solid, 66 Spiracle / normal sclerotized, 67 'Cp + abd setal bases' / normal gast, 68 Cribellum / pres abs, 69 ALS_PY_bases / normal reduced, 70 Narrowed_PY_fields / abs pres, 71 PMS_paracrib / strob deinopoid abs, 72 PMS_mAP_nubbins / abs pres, 73 PMS_AC_brush / pres 'reduced-absent', 74 Orb / abs pres lost, 75 Orb / w_SS no_SS, 76 Radius_behaviour / Ar Ul Cy Ne, 77 'Non-sticky spiral' / temporary perm, 78 SS_localization / oL1 iL1 L4, 79 'Low-shear SS lines' / abs pres, 80 Very_sticky_SS / abs pres, 81 Extreme_L4_combing / abs pres, 82 Rest_posture / L12_flexed L12_extend ;
+ MATRIX
+ Dictyna 0000200001-----0-000-0000000---0000000000000000000100000-00000000000000000-------0
+ Uloborus 0011000000000000-000-0000000---00010001000000000000-2010-0000000000000100101000001
+ Deinopis 0000000???-----0-000-0000000---00000001000000000000-2010-0000001000000100100000001
+ Nephila 0001010011-----0-000-0000000---0000000101000000000100000-0001001000100211103120000
+ Nephilengys 0001010011-----0-000-0000000---0000000100000000100100000-0001001000100211103120000
+ Meta 0001010011-----0-000-0000000---0000000101000000000100000-0000001000100210100010001
+ Leucauge 0001010011-----0-000-00000010000001000101000000000100000-0000000000100211100010001
+ Pachygnatha 0001011011-----0-000-0001100---00010001010000000000-0000-000000000010020110?-?000?
+ Tetragnatha 0001011011-----0-000-0001100---00010001010000000000-2000-0000000000100211100010001
+ Theridiosoma 0001010000000000-000-0010000---0000000101000000100100000-0000000000110211100010000
+ Linyphia 0001010--1-----0-000-00000010100000100101001000000100000-00000000001102012----0000
+ Pimoa 0001010000000000-000-0000000---0000100101001000000100000-00000000001102012----0000
+ Theridion 0000000000000000-000-0000000----000000101000000000100000-00000000001102012----0000
+ Chorizopes 0001110000000001111101000000---0000000101000000001110000-00000000011002102-------?
+ Gasteracantha 00001100000000010100-0000011000000001010011000110111000111211101211100210100000000
+ Aetrocantha 00001100000000010100-0000010---0000010100110001101110001112111011111002?01???000??
+ Togacantha 00001100000000010100-0000000---0000010110110001101110001111111011111002?01????00??
+ Gastroxya 00201100000010010100-0000000---0000000100010001101110001211111010111002?01????00??
+ Augusta 00201100000010010100-0000000---0000000100100001101110001203111010111002?01??0?00?0
+ Isoxya 00001100000000010100-0000000---0110010110120001101110001114101010111002?010?0?00?0
+ Austracantha 01101100000000010100-0000000---0110010100020101101110001111101010111002101????????
+ Macracantha 00001100000000010100-0000000---0000010110110001101110001101111012111002?0100??00?0
+ Aspidolasius 0001110000000000-000-0000000---00000001101200011011100011051100100?10021010?0?000?
+ Caerostris 0001100000000000-000-0000000---00000111101000111011100010001100100110021010?000000
+ Hypognatha 00011100000000010000-0000000---01100001010200011111100011051000101?100211100000000
+ Arkys 0001110000000000-000-000000101000000001010200011011?0101206000010011002112????????
+ Archemorus 0001110000000000-000-0000000---00000011010000011011?0101206100010011002012????????
+ Encyosaccus 00011101000000010000-00000010000000000101000001111110001100110010011002?010?0?00??
+ Xylethrus 00011101000000010000-00000010000000000101000000001110001100110011111002?010?0?00?0
+ Chaetacis 00011101000000010000-00000010000000000101000000001110001000000112111002?010??00000
+ Micrathena 00011101000000010000-0000001000011000011100000000111000100000011211100210100000000
+ Mastophora 00001100000000010000-0000000---0000010111000000001110000?00110010001002?02-----11-
+ Cyrtarachne 00011100000000010000-0000000---0000000101000000000110001000110010001002?01000?111?
+ Pasilobus 0001110000000001?0?0-0000000---0000000101000000001110001100110010001002?010??-111?
+ Arachnura 00001100000000010000-0000000---0000000110000000011110000-00010010001002?0100000000
+ Witica 0000110001-----10000-0010000---0000010110000000001110000-00010010001002?0100000000
+ Mecynogea 0001110001-----10000-1010000---0000000110000000000110000-000000100010121011?1----0
+ Cyrtophora 000?1100000000010000-0010000---0000000110000000010110000-00010010001012101121----0
+ Neogea 0001110000010000-000-0000000---0000000110000000000111100-000100100010021010???00??
+ Argiope 00011100000100010010-0100000---0000000110000000000111100-0001001000100210100000000
+ Gea 00011100000100010010-0000000---0000000110000000000111100-0000001000100210100000000
+ Scoloderus 01111100000000010000-00000010000110000111000000001110000-0000001000100210100000000
+ Acanthepeira 00111100000000010100-00000010000000000110000100101110000-0000001000100210100000000
+ Anepsion 0011110000000001?100-10000010000110000111000100000110000-000000100?100210?????000?
+ Dolophone 0011110001000--11100-100000100100000111100001100001100011001000100?10021010?0?????
+ Hypsosinga 00111100000000011010-10?00010000000000110000100100110000-000000100010021010???00??
+ Zygiella 00011100000000011010-1010000---0000000110000000010110000-000000100010021010???00??
+ Kaira 00001100001000111000-10000010000000000110000000010110000-00010010001002101-------?
+ Metepeira 00011100001000111000-11000010010000000110000100010110000-0000001000100210100000000
+ Singa 00111100000000011011010000010100000000110000000010110000-000000100010021010???00?0
+ Larinia 00111100000000011011011000010110000000110000100010110000-000000100010021010?000001
+ Neoscona 01111100000000011011010000010100110000111000100010110000-0000001000100210100000000
+ Mangora 01111100000000011001010000010110110000110000100010110000-0000001000100210100000000
+ Cercidia 01011100000000011001010000010110110000110000100010110000-000000100010021110???00??
+ Pronous 0101110000000001000???000000---01100001?0000100000??0000-000000100010021110??0000?
+ Aculepeira 11111100001000111011011000010010110000110000100010110000-000000100010021010???00?0
+ Araneus 11111100001000011011011000010110110000110000100010110000-0000001000100210100000000
+ Bertrana 01111101000000010001010000010100110000100000100010110000-0010001000100210100000000
+ Alpaida 00111101000000010001010000010000110010110000100011110000-000000100010021010000000?
+ Enacrosoma 00111101000000010000-10000010100110000110000100110110000-001000100010021010??0000?
+ Wixia 01111101000001011001010000010001110000110000100000110000-000000100010021010000000?
+ Acacesia 01111100000001011001010000010001110000110000100000110000-0000001000100210100000000
+ Metazygia 01111100000000011001110000010000110000110000100010110000-0000001000100210100000000
+ Eustala 01111101000000011011110000010010110000110000100000110000-0000001000100210100000000
+ Cyclosa 01111101000000011000-10000010010110010110000100010110000-0000001000100210100000000
+ Nuctenea 00011100000000011000-10000010010110000110000100010110000-0000001000100210100000000
+ Colphepeira 00111100000000011000-100000100101100001?0000100000110000-00000010001??2??10???00??
+ Araniella 10111101000000010000-10000010110110000110000100011110000-000000100010021010?0?00?0
+ Eriophora 11111100000000011110-10000011010110010110000100011110000-0000001000100210100000000
+ Verrucosa 11111101000000011000-10000011010110000110000100111110000-001000100010021010?000000
+
+;
+ IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 JC1191fcddc3b435 JC1191fcddc3b436 JC1191fcddc3b437 JC1191fcddc3b438 JC1191fcddc3b439 JC1191fcddc3b440 JC1191fcddc3b441 JC1191fcddc3b442 JC1191fcddc3b443 JC1191fcddc3b444 JC1191fcddc3b445 JC1191fcddc3b446 JC1191fcddc3b447 JC1191fcddc3b448 JC1191fcddc3b449 JC1191fcddc3b450 JC1191fcddc3b451 JC1191fcddc3b452 JC1191fcddc3b453 JC1191fcddc3b454 JC1191fcddc3b455 JC1191fcddc3b456 JC1191fcddc3b457 JC1191fcddc3b458 JC1191fcddc3b459 JC1191fcddc3b460 JC1191fcddc3b461 JC1191fcddc3b462 JC1191fcddc3b463 JC1191fcddc3b464 JC1191fcddc3b465 JC1191fcddc3b466 JC1191fcddc3b467 JC1191fcddc3b468 JC1191fcddc3b469 JC1191fcddc3b470 JC1191fcddc3b471 JC1191fcddc3b472 JC1191fcddc3b473 JC1191fcddc3b474 JC1191fcddc3b475 JC1191fcddc3b476 JC1191fcddc3b477 JC1191fcddc3b478 JC1191fcddc3b479 JC1191fcddc3b480 JC1191fcddc3b481 JC1191fcddc3b482 JC1191fcddc3b483 JC1191fcddc3b484 JC1191fcddc3b485 JC1191fcddc3b486 JC1191fcddc3b487 JC1191fcddc3b488 JC1191fcddc3b489 JC1191fcddc3b490 JC1191fcddc3b491 JC1191fcddc3b492 JC1191fcddc3b493 JC1191fcddc3b494 JC1191fcddc3b495 JC1191fcddc3b496 JC1191fcddc3b497 JC1191fcddc3b498 JC1191fcddc3b499 JC1191fcddc3b500 JC1191fcddc3b501 JC1191fcddc3b502 JC1191fcddc3b503 JC1191fcddc3b504 JC1191fcddc3b505 JC1191fcddc3b506 ;
+ BLOCKID JC1191fcddc0c0;
+
+
+END;
+BEGIN TREES;
+ Title Imported_trees;
+ LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
+ TRANSLATE
+ 1 Dictyna,
+ 2 Uloborus,
+ 3 Deinopis,
+ 4 Nephila,
+ 5 Nephilengys,
+ 6 Meta,
+ 7 Leucauge,
+ 8 Pachygnatha,
+ 9 Tetragnatha,
+ 10 Theridiosoma,
+ 11 Linyphia,
+ 12 Pimoa,
+ 13 Theridion,
+ 14 Chorizopes,
+ 15 Gasteracantha,
+ 16 Aetrocantha,
+ 17 Togacantha,
+ 18 Gastroxya,
+ 19 Augusta,
+ 20 Isoxya,
+ 21 Austracantha,
+ 22 Macracantha,
+ 23 Aspidolasius,
+ 24 Caerostris,
+ 25 Hypognatha,
+ 26 Arkys,
+ 27 Archemorus,
+ 28 Encyosaccus,
+ 29 Xylethrus,
+ 30 Chaetacis,
+ 31 Micrathena,
+ 32 Mastophora,
+ 33 Cyrtarachne,
+ 34 Pasilobus,
+ 35 Arachnura,
+ 36 Witica,
+ 37 Mecynogea,
+ 38 Cyrtophora,
+ 39 Neogea,
+ 40 Argiope,
+ 41 Gea,
+ 42 Scoloderus,
+ 43 Acanthepeira,
+ 44 Anepsion,
+ 45 Dolophone,
+ 46 Hypsosinga,
+ 47 Zygiella,
+ 48 Kaira,
+ 49 Metepeira,
+ 50 Singa,
+ 51 Larinia,
+ 52 Neoscona,
+ 53 Mangora,
+ 54 Cercidia,
+ 55 Pronous,
+ 56 Aculepeira,
+ 57 Araneus,
+ 58 Bertrana,
+ 59 Alpaida,
+ 60 Enacrosoma,
+ 61 Wixia,
+ 62 Acacesia,
+ 63 Metazygia,
+ 64 Eustala,
+ 65 Cyclosa,
+ 66 Nuctenea,
+ 67 Colphepeira,
+ 68 Araniella,
+ 69 Eriophora,
+ 70 Verrucosa;
+ TREE Imported_tree_1 = (1,((2,3),((((4,5),(6,(7,(8,9)))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((46,((47,(48,49)),(50,51))),(45,((66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(56,57)),(53,(54,55)))))),(44,67)))))))))));
+ TREE Imported_tree_2 = (1,((2,3),((((4,5),(6,(7,(8,9)))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((46,(50,(47,(48,49)))),(45,((66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(51,(56,57))),(53,(54,55)))))),(44,67)))))))))));
+ TREE Imported_tree_3 = (1,((2,3),((((6,(4,5)),(7,(8,9))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((46,((47,(48,49)),(50,51))),(45,((66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(56,57)),(53,(54,55)))))),(44,67)))))))))));
+ TREE Imported_tree_4 = (1,((2,3),((((4,5),(6,(7,(8,9)))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((44,(67,(66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(56,57)),(53,(54,55)))))))),(45,(46,((47,(48,49)),(50,51))))))))))));
+ TREE Imported_tree_5 = (1,((2,3),((((6,(4,5)),(7,(8,9))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((46,(50,(47,(48,49)))),(45,((66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(51,(56,57))),(53,(54,55)))))),(44,67)))))))))));
+ TREE Imported_tree_6 = (1,((2,3),((((4,5),(6,(7,(8,9)))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((44,(67,(66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(51,(56,57))),(53,(54,55)))))))),(45,(46,(50,(47,(48,49)))))))))))));
+ TREE Imported_tree_7 = (1,((2,3),((((6,(4,5)),(7,(8,9))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((44,(67,(66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(56,57)),(53,(54,55)))))))),(45,(46,((47,(48,49)),(50,51))))))))))));
+ TREE Imported_tree_8 = (1,((2,3),((((6,(4,5)),(7,(8,9))),(10,(13,(11,12)))),(14,(((((((18,19),(22,((17,(15,16)),(20,21)))),(23,24)),((25,(26,27)),(28,(29,(30,31))))),(32,(33,34))),((35,36),((37,38),(39,(40,41))))),(42,(43,((44,(67,(66,((65,(68,(69,70))),((63,((59,(58,60)),(64,(61,62)))),((52,(51,(56,57))),(53,(54,55)))))))),(45,(46,(50,(47,(48,49)))))))))))));
+
+END;
+
+
+BEGIN LABELS;
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
+
+
+END;
+
+BEGIN SETS;
+ CHARPARTITION * UNTITLED = Somatic : 1 - 4 32 - 67, MM_Genitalia : 5 - 31, Spinnerets : 68 - 73, Behavior : 74 - 82;
+
+END;
+
+BEGIN ASSUMPTIONS;
+ TYPESET * UNTITLED = unord: 1 - 82;
+
+ EXSET * UNTITLED = ;
+
+ WTSET * UNTITLED = 1: 1 - 82 ;
+
+END;
+
+BEGIN CODONS;
+ CODESET * UNTITLED = universal: 1 - 82;
+
+
+END;
+
+BEGIN MESQUITECHARMODELS;
+ ProbModelSet * UNTITLED = 'Mk1 (est.)': 1 - 82;
+END;
+
+BEGIN NOTES;
+
+ TEXT FILE TEXT = 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434';
+
+END;
+
+Begin MESQUITE;
+ MESQUITESCRIPTVERSION 2;
+ TITLE AUTO;
+ tell ProjectCoordinator;
+ getEmployee #mesquite.minimal.ManageTaxa.ManageTaxa;
+ tell It;
+ setID 0 6498976404544962919 JC1191fcddc0c4;
+ endTell;
+ getEmployee #mesquite.charMatrices.ManageCharacters.ManageCharacters;
+ tell It;
+ setID 0 2280451364422511229 JC1191fcddc0c0;
+ checksum 0 114188958 JC1191fcddc0c0;
+ endTell;
+ getEmployee #mesquite.charMatrices.BasicDataWindowCoord.BasicDataWindowCoord;
+ tell It;
+ showDataWindow #2280451364422511229 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+ tell It;
+ getWindow;
+ tell It;
+ setExplanationSize 30;
+ setAnnotationSize 20;
+ setFontIncAnnot 0;
+ setFontIncExp 0;
+ setSize 707 998;
+ setLocation 734 14;
+ setFont SanSerif;
+ setFontSize 10;
+ getToolPalette;
+ tell It;
+ endTell;
+ setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.arrow;
+ colorCells #mesquite.charMatrices.NoColor.NoColor;
+ setBackground White;
+ toggleShowNames off;
+ toggleShowTaxonNames on;
+ toggleTight off;
+ toggleShowChanges on;
+ toggleSeparateLines off;
+ toggleShowStates on;
+ toggleAutoWCharNames off;
+ toggleShowDefaultCharNames off;
+ toggleConstrainCW on;
+ toggleBirdsEye off;
+ toggleAllowAutosize on;
+ toggleColorsPanel off;
+ toggleDiagonal off;
+ toggleLinkedScrolling on;
+ toggleScrollLinkedTables off;
+ endTell;
+ showWindow;
+ getWindow;
+ tell It;
+ forceAutosize;
+ endTell;
+ getEmployee #mesquite.categ.StateNamesStrip.StateNamesStrip;
+ tell It;
+ showStrip off;
+ endTell;
+ getEmployee #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+ tell It;
+ togglePanel off;
+ endTell;
+ getEmployee #mesquite.charMatrices.CharReferenceStrip.CharReferenceStrip;
+ tell It;
+ showStrip off;
+ endTell;
+ getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+ tell It;
+ setColor Red;
+ removeColor off;
+ endTell;
+ getEmployee #mesquite.charMatrices.QuickKeySelector.QuickKeySelector;
+ tell It;
+ autotabOff;
+ endTell;
+ getEmployee #mesquite.collab.ChgHistoryInfo.ChgHistoryInfo;
+ tell It;
+ togglePanel off;
+ endTell;
+ getEmployee #mesquite.silk.AnnotSummary.AnnotSummary;
+ tell It;
+ togglePanel off;
+ endTell;
+ getEmployee #mesquite.silk.CellImages.CellImages;
+ tell It;
+ showColors off;
+ togglePanel off;
+ toggleAnnotations off;
+ toggleSums off;
+ toggleHistory off;
+ toggleLocations off;
+ endTell;
+ getEmployee #mesquite.silk.StateImages.StateImages;
+ tell It;
+ showColors off;
+ togglePanel off;
+ toggleStateAnnotations off;
+ toggleCharAnnotations off;
+ toggleLocations off;
+ endTell;
+ getEmployee #mesquite.silk.aaManageImageIndices.aaManageImageIndices;
+ tell It;
+ setLocal;
+ setPrevIndexBase 'http://salticus.zoology.ubc.ca/ATOLSpiders/ATOLImages';
+ reset;
+ endTell;
+ endTell;
+ endTell;
+ getEmployee #mesquite.charMatrices.ManageCharacters.ManageCharacters;
+ tell It;
+ showCharacters #2280451364422511229 #mesquite.lists.CharacterList.CharacterList;
+ tell It;
+ setData 0;
+ getWindow;
+ tell It;
+ newAssistant #mesquite.lists.DefaultCharOrder.DefaultCharOrder;
+ newAssistant #mesquite.lists.CharListInclusion.CharListInclusion;
+ newAssistant #mesquite.lists.CharListPartition.CharListPartition;
+ newAssistant #mesquite.stochchar.CharListProbModels.CharListProbModels;
+ setExplanationSize 30;
+ setAnnotationSize 20;
+ setFontIncAnnot 0;
+ setFontIncExp 0;
+ setSize 707 998;
+ setLocation 734 14;
+ setFont SanSerif;
+ setFontSize 10;
+ getToolPalette;
+ tell It;
+ endTell;
+ endTell;
+ showWindow;
+ getEmployee #mesquite.lists.CharListAnnotPanel.CharListAnnotPanel;
+ tell It;
+ togglePanel off;
+ endTell;
+ endTell;
+ endTell;
+ getEmployee #mesquite.basic.ManageFileComments.ManageFileComments;
+ tell It;
+ editFileComment 0;
+ tell It;
+ setExplanationSize 0;
+ setAnnotationSize 0;
+ setFontIncAnnot 0;
+ setFontIncExp 0;
+ setSize 707 1048;
+ setLocation 734 14;
+ setFont SanSerif;
+ setFontSize 10;
+ setActive;
+ showWindow;
+ endTell;
+ endTell;
+ endTell;
+end;
+
+
View
938 test/test_nexus_parser.rb
@@ -1,7 +1,937 @@
-require 'helper'
+require 'test/unit'
+require 'rubygems'
+require 'ruby-debug'
-class TestNexusParser < Test::Unit::TestCase
- should "probably rename this file and start testing for real" do
- flunk "hey buddy, you should probably rename this file and start testing for real"
+require File.expand_path(File.join(File.dirname(__FILE__), '../lib/nexus_file'))
+
+class NexusParserTest < Test::Unit::TestCase
+ def test_truth
+ assert true
+ end
+end
+
+class Test_NexusFile_Builder < Test::Unit::TestCase
+ def test_builder
+ b = NexusFile::Builder.new
+ assert foo = b.nexus_file
+ assert_equal [], foo.taxa
+ assert_equal [], foo.characters
+ assert_equal [], foo.codings
+ assert_equal [], foo.sets
+ end
+end
+
+
+class Test_Regex < Test::Unit::TestCase
+ def test_begin_taxa
+ txt = " aslkfja\n Begin taxa; BLorf end; "
+ @regexp = Regexp.new(/\s*(Begin\s*taxa\s*;)\s*/i)
+ assert txt =~ @regexp
+ end
+
+end
+
+
+class Test_Lexer < Test::Unit::TestCase
+ def test_lexer
+ lexer = NexusFile::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
+ assert lexer.pop(NexusFile::Tokens::LBracket)
+ assert id = lexer.pop(NexusFile::Tokens::ID)
+ assert_equal(id.value, "foo")
+ assert lexer.pop(NexusFile::Tokens::RBracket)
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
+ assert lexer.pop(NexusFile::Tokens::TaxaBlk)
+ assert foo = lexer.pop(NexusFile::Tokens::ID)
+ assert_equal("BLORF", foo.value) # truncating whitespace
+ assert lexer.pop(NexusFile::Tokens::BlkEnd)
+
+ lexer2 = NexusFile::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
+ assert id = lexer2.pop(NexusFile::Tokens::ID)
+ assert_equal(id.value, "foo")
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
+ assert lexer2.pop(NexusFile::Tokens::BeginBlk)
+ assert lexer2.pop(NexusFile::Tokens::AuthorsBlk)
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
+ assert lexer2.pop(NexusFile::Tokens::LParen)
+ assert lexer2.pop(NexusFile::Tokens::RParen)
+
+
+ lexer3 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
+ assert lexer3.pop(NexusFile::Tokens::LBracket)
+ assert id = lexer3.pop(NexusFile::Tokens::ID)
+ assert_equal(id.value, "foo")
+ assert lexer3.pop(NexusFile::Tokens::RBracket)
+ assert lexer3.pop(NexusFile::Tokens::BeginBlk)
+ assert lexer3.pop(NexusFile::Tokens::ChrsBlk)
+ assert foo = lexer3.pop(NexusFile::Tokens::ID)
+ assert_equal("BLORF", foo.value)
+ assert lexer3.pop(NexusFile::Tokens::BlkEnd)
+
+ lexer4 = NexusFile::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
+ assert lexer4.pop(NexusFile::Tokens::BeginBlk)
+ assert lexer4.pop(NexusFile::Tokens::ChrsBlk)
+ assert foo = lexer4.pop(NexusFile::Tokens::Number)
+ assert_equal(123123123, foo.value)
+ assert lexer4.pop(NexusFile::Tokens::BlkEnd)
+
+ lexer5 = NexusFile::Lexer.new("(0,1)")
+ assert lexer5.pop(NexusFile::Tokens::LParen)
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
+ assert_equal(0, foo.value)
+ assert lexer5.pop(NexusFile::Tokens::Comma)
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
+ assert_equal(1, foo.value)
+ assert lexer5.pop(NexusFile::Tokens::RParen)
+
+ lexer6 = NexusFile::Lexer.new(" 210(0,1)10A1\n")
+ assert foo = lexer6.pop(NexusFile::Tokens::RowVec)
+ assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
+
+ lexer6a = NexusFile::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
+ assert foo = lexer6a.pop(NexusFile::Tokens::RowVec)
+ assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
+
+ lexer6b = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
+ assert foo = lexer6b.pop(NexusFile::Tokens::RowVec)
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
+
+ lexer6c = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
+ assert foo = lexer6c.pop(NexusFile::Tokens::RowVec)
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
+
+
+ lexer7 = NexusFile::Lexer.new("read nothing till Nexus, not that nexus 13243 Block [] ();, this one: #nexus FOO")
+ assert foo = lexer7.pop(NexusFile::Tokens::NexusStart)
+ assert_equal('#nexus', foo.value)
+
+
+ ## we strip comments before parsing now
+ # lexer8 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
+ # assert foo = lexer8.pop(NexusFile::Tokens::NexusComment)
+ # assert_equal "foo", foo.value
+
+ # assert lexer.pop(NexusFile::Tokens::Colon)
+ # assert num = lexer.pop(NexusFile::Tokens::Number)
+ # assert_equal(num.value, 0.0)
+ # assert lexer.pop(NexusFile::Tokens::Comma)
+ # assert lexer.pop(NexusFile::Tokens::SemiColon)
+ end
+
+ def test_row_vec
+ lexer = NexusFile::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
+ assert foo = lexer.pop(NexusFile::Tokens::RowVec)
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
+ end
+
+ def test_punctuation
+ lexer = NexusFile::Lexer.new(',/=](\'NOT23\'[);,')
+ assert lexer.peek(NexusFile::Tokens::Comma)
+ assert lexer.pop(NexusFile::Tokens::Comma)
+ assert lexer.pop(NexusFile::Tokens::BckSlash)
+ assert lexer.pop(NexusFile::Tokens::Equals)
+ assert lexer.pop(NexusFile::Tokens::RBracket)
+ assert lexer.pop(NexusFile::Tokens::LParen)
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "NOT23", foo.value
+ assert lexer.pop(NexusFile::Tokens::LBracket)
+ assert lexer.pop(NexusFile::Tokens::RParen)
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
+ assert lexer.pop(NexusFile::Tokens::Comma)
+
+ end
+
+ def test_tax_labels
+ lexer = NexusFile::Lexer.new("Taxlabels 'foo' bar blorf \"stuff things\" stuff 'and foo';")
+ assert foo = lexer.pop(NexusFile::Tokens::Taxlabels)
+ assert_equal("Taxlabels ", foo.value)
+ end
+
+ def test_EndBlk
+ lexer = NexusFile::Lexer.new(" \n\n End ;")
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
+ lexer = NexusFile::Lexer.new("\n\nEnd;")
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
+
+ lexer = NexusFile::Lexer.new("123123 \n\nEnd;")
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
+ lexer = NexusFile::Lexer.new("this is not an \"end\"\n\nEnd;")
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
+ end
+
+ def test_semicolon
+ lexer = NexusFile::Lexer.new("; Matrix foo")
+ assert lexer.peek(NexusFile::Tokens::SemiColon)
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
+ end
+
+ def test_label
+ lexer = NexusFile::Lexer.new(' \'foo\' bar, blorf; "stuff things" stuff \'and foo\' 23434 ""asdf"" \'Foo_And_Stuff\' ')
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "foo", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "bar", foo.value
+ assert lexer.pop(NexusFile::Tokens::Comma)
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "blorf", foo.value
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "stuff things", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "stuff", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "and foo", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "23434", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal '"asdf"', foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal 'Foo_And_Stuff', foo.value
+ end
+
+ def test_odd_labels
+ lexer = NexusFile::Lexer.new("blorf 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' \"\"\" foo \"\"\" '''rupununi''' '''tanzania''' '''cup-shaped''' bar blorf\n;")
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "blorf", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal '"" foo ""', foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "''rupununi''", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "''tanzania''", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "''cup-shaped''", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "bar", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "blorf", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
+ end
+
+ def test_title
+ lexer = NexusFile::Lexer.new( "TITLE 'Scharff&Coddington_1997_Araneidae';")
+ assert foo = lexer.pop(NexusFile::Tokens::Title)
+ assert_equal "TITLE 'Scharff&Coddington_1997_Araneidae';", foo.value
+ end
+
+
+ def test_dimensions
+ input = " DIMENSIONS NCHAR= 10"
+ lexer = NexusFile::Lexer.new(input)
+ assert foo = lexer.pop(NexusFile::Tokens::Dimensions)
+ assert_equal "DIMENSIONS", foo.value
+ end
+
+ def test_format
+ input = " format NCHAR= 10"
+ lexer = NexusFile::Lexer.new(input)
+ assert foo = lexer.pop(NexusFile::Tokens::Format)
+ assert_equal "format", foo.value
+ end
+
+ def test_odd_value_pair
+ lexer = NexusFile::Lexer.new(" TEXT CHARACTER = 3 TEXT = A62.003;
+
+ TEXT CHARACTER = 4 TEXT = A62.004; \n end; ")
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:character => "3"}
+ assert_equal blorf , foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:text => "A62.003"}
+ assert_equal blorf , foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
+ assert_equal "TEXT", foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:character => "4"}
+ assert_equal blorf , foo.value
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:text => "A62.004"}
+ assert_equal blorf , foo.value
+
+ end
+
+
+ def test_value_pair
+
+ lexer0 = NexusFile::Lexer.new(' DATATYPE=STANDARD ')
+ assert foo = lexer0.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:datatype => "STANDARD"}
+ assert_equal blorf , foo.value
+
+ lexer = NexusFile::Lexer.new(' DATATYPE = STANDARD ')
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
+ blorf = {:datatype => "STANDARD"}
+ assert_equal blorf , foo.value
+
+ lexer2 = NexusFile::Lexer.new(' DATATYPE ="STANDARD" ')
+ assert foo = lexer2.pop(NexusFile::Tokens::ValuePair)
+ assert_equal blorf, foo.value
+
+ lexer3 = NexusFile::Lexer.new('DATATYPE= "STANDARD" ')
+ assert foo = lexer3.pop(NexusFile::Tokens::ValuePair)
+ assert_equal blorf, foo.value
+
+ input= " NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" ; "
+ lexer4 = NexusFile::Lexer.new(input)
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:nchar => '10'}
+ assert_equal smorf, foo.value
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:ntaxa => '10'}
+ assert_equal smorf, foo.value
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:nfoo => '999'}
+ assert_equal smorf, foo.value
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:nbar => 'a b c'}
+ assert_equal smorf, foo.value
+
+ lexer5 = NexusFile::Lexer.new(' symbols= " a c b d 1 " ')
+ assert foo = lexer5.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:symbols => 'a c b d 1'}
+ assert_equal smorf, foo.value
+
+ lexer6 = NexusFile::Lexer.new(' missing = - ')
+ assert foo = lexer6.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:missing => '-'}
+ assert_equal smorf, foo.value
+
+ lexer6a = NexusFile::Lexer.new("ntaxa=1;\n")
+ assert foo = lexer6a.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:ntaxa => '1'}
+ assert_equal smorf, foo.value
+
+ lexer7 = NexusFile::Lexer.new("ntaxa =1;\n")
+ assert foo = lexer7.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:ntaxa => '1'}
+ assert_equal smorf, foo.value
+
+ lexer8 = NexusFile::Lexer.new(" ntaxa = 1 ;\n")
+ assert foo = lexer8.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:ntaxa => '1'}
+ assert_equal smorf, foo.value
+
+ lexer9 = NexusFile::Lexer.new(" TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ")
+ assert foo = lexer9.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:tf => "(CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!')" }
+ assert_equal smorf, foo.value
+
+ lexer10 = NexusFile::Lexer.new(" TF = (CM 'This is an value pair that has (parens) within the value, twice! ()') ; some stuff left here ")
+ assert foo = lexer10.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:tf => "(CM 'This is an value pair that has (parens) within the value, twice! ()')" }
+ assert_equal smorf, foo.value
+
+ lexer11 = NexusFile::Lexer.new("CHARACTER = 1 TEXT = A62.001;")
+ assert_equal true, !lexer11.peek(NexusFile::Tokens::SemiColon)
+ assert_equal true, lexer11.peek(NexusFile::Tokens::ValuePair)
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
+ smorf = {:character => "1" }
+ assert_equal smorf, foo.value
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
+ end
+
+ def test_MesquiteIDs
+ lexer = NexusFile::Lexer.new('IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
+ BLOCKID JC1191fcddc0c0;')
+ assert lexer.pop(NexusFile::Tokens::MesquiteIDs)
+ assert lexer.pop(NexusFile::Tokens::MesquiteBlockID)
+ end
+
+ def test_TreesBlk
+ lexer = NexusFile::Lexer.new("BEGIN TREES;
+ Title Imported_trees;
+ LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
+ TRANSLATE
+ 1 Dictyna,
+ 2 Uloborus,
+ 3 Deinopis,
+ 4 Nephila&Herennia,
+ 5 'Nephilengys_cruentata',
+ 6 Meta,
+ 7 Leucauge_venusta,
+ 8 Pachygnatha,
+ 9 'Theridiosoma_01',
+ 10 Tetragnatha;
+ TREE 'Imported tree 1+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 2+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 3+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 4+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 5+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 6+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
+ TREE 'Imported tree 7+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+ TREE 'Imported tree 8+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
+
+ END;
+
+
+ BEGIN LABELS;
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
+
+
+ END;")
+
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
+ assert foo = lexer.pop(NexusFile::Tokens::TreesBlk)
+ assert_equal 'TREES', foo.value.slice(0,5)
+ assert_equal 'END;', foo.value.slice(-4,4)
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
+ assert lexer.pop(NexusFile::Tokens::LabelsBlk)
+
+ end
+
+ def test_NotesBlk
+ input = "BEGIN NOTES ;"
+ lexer = NexusFile::Lexer.new(input)
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
+ assert foo = lexer.pop(NexusFile::Tokens::NotesBlk)
+ assert "NOTES", foo.value
+ end
+
+ def test_LabelsBlk
+ lexer = NexusFile::Lexer.new("
+ LABELS;
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
+
+
+ END;
+
+ BEGIN some other block;")
+
+ assert foo = lexer.pop(NexusFile::Tokens::LabelsBlk)
+ assert_equal 'LABELS', foo.value.slice(0,6)
+ assert_equal 'END;', foo.value.slice(-4,4)
+ end
+
+ def test_SetsBlk
+ lexer = NexusFile::Lexer.new("
+ SETS;
+ CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
+
+ END;
+ BEGIN some other block;")
+
+ assert foo = lexer.pop(NexusFile::Tokens::SetsBlk)
+ assert_equal 'SETS', foo.value.slice(0,4)
+ assert_equal 'END;', foo.value.slice(-4,4)
+ end
+
+
+
+ def test_lexer_errors
+ lexer = NexusFile::Lexer.new("*&")
+ assert_raise(NexusFile::ParseError) {lexer.peek(NexusFile::Tokens::ID)}
+ end
+end
+
+
+class Test_Parser < Test::Unit::TestCase
+ def setup
+ # a Mesquite 2.n or higher file
+ @nf = File.read('MX_test_03.nex') # MX_test_01.nex
+ end
+
+ def teardown
+ @nf = nil
+ end
+
+ def test_that_file_might_be_nexus
+ begin
+ assert !parse_nexus_file("#Nexblux Begin Natrix end;")
+ rescue NexusFile::ParseError
+ assert true
+ end
+ end
+
+ def test_parse_initializes
+ foo = parse_nexus_file(@nf)
+ end
+
+ def test_parse_file
+ # this is the major loop, all parts should exist
+ foo = parse_nexus_file(@nf)
+
+ assert_equal 10, foo.taxa.size
+ assert_equal 10, foo.characters.size
+ assert_equal 10, foo.codings.size
+ assert_equal 1, foo.taxa[1].notes.size # asserts that notes are parsing
+ assert_equal "norm", foo.characters[0].states["0"].name
+ assert_equal "modified", foo.characters[0].states["1"].name
+ end
+
+ def test_parse_authors_blk
+ end
+
+ def test_taxa_block
+ # we've popped off the header already
+ input =
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NTAX=10;
+ TAXLABELS
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
+ ;
+ IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
+ BLOCKID JC1191fcddc0c4;
+ END;"
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
+ foo = builder.nexus_file
+
+ assert_equal 10, foo.taxa.size
+ assert_equal "Dictyna", foo.taxa[0].name
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
+ assert_equal "Tetragnatha", foo.taxa[9].name
end
+
+ def test_taxa_block_without_IDS
+ # we've popped off the header already
+ input =
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NTAX=10;
+ TAXLABELS
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
+ ;
+ END;"
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
+ foo = builder.nexus_file
+
+ assert_equal 10, foo.taxa.size
+ assert_equal "Dictyna", foo.taxa[0].name
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
+ assert_equal "Tetragnatha", foo.taxa[9].name
+ end
+
+
+
+ def test_parse_characters_blk
+ input= "
+ TITLE 'Scharff&Coddington_1997_Araneidae';
+ DIMENSIONS NCHAR=10;
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
+ CHARSTATELABELS
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
+ MATRIX
+ Dictyna 0?00201001
+ Uloborus 0?11000000
+ Deinopis 0?01002???
+ Nephila&Herennia 0?21010011
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
+ Meta 0?01A10011
+ Leucauge_venusta ???--?-??-
+ Pachygnatha 0?210(0 1)0011
+ 'Theridiosoma_01' ??????????
+ Tetragnatha 0?01011011
+
+ ;
+ IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
+ BLOCKID JC1191fcddc0c0;
+
+ END;"
+
+ builder = NexusFile::Builder.new
+ @lexer = NexusFile::Lexer.new(input)
+
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
+
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
+ (0..9).each{|i| builder.stub_taxon}
+
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
+ foo = builder.nexus_file
+
+ assert_equal 10, foo.characters.size
+ assert_equal "Tibia_II", foo.characters[0].name
+ assert_equal "TII_macrosetae", foo.characters[1].name
+
+ assert_equal "norm", foo.characters[0].states["0"].name
+ assert_equal "modified", foo.characters[0].states["1"].name
+
+
+ # ?!!?
+ # foo.characters[0].states["1"].name
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
+
+
+ assert_equal ["0","1"], foo.codings[7][5].states
+ assert_equal ["?"], foo.codings[9][1].states
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
+ end
+
+ def test_characters_block_without_IDs_or_title
+ input= "
+ DIMENSIONS NCHAR=10;
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
+ CHARSTATELABELS
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
+ MATRIX
+ Dictyna 0?00201001
+ Uloborus 0?11000000
+ Deinopis 0?01002???
+ Nephila&Herennia 0?21010011
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
+ Meta 0?01A10011
+ Leucauge_venusta ???--?-??-
+ Pachygnatha 0?210(0 1)0011
+ 'Theridiosoma_01' ??????????
+ Tetragnatha 0?01011011
+
+ ;
+ END;"
+
+ builder = NexusFile::Builder.new
+ @lexer = NexusFile::Lexer.new(input)
+
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
+
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
+ (0..9).each{|i| builder.stub_taxon}
+
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
+ foo = builder.nexus_file
+
+ assert_equal 10, foo.characters.size
+ assert_equal "Tibia_II", foo.characters[0].name
+ assert_equal "TII_macrosetae", foo.characters[1].name
+ assert_equal "norm", foo.characters[0].states["0"].name
+ assert_equal "modified", foo.characters[0].states["1"].name
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
+ assert_equal ["0","1"], foo.codings[7][5].states
+ assert_equal ["?"], foo.codings[9][1].states
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
+ end
+
+ def test_characters_block_from_file
+ foo = parse_nexus_file(@nf)
+ assert 10, foo.characters.size
+ end
+
+ def test_codings
+ foo = parse_nexus_file(@nf)
+ assert 100, foo.codings.size # two multistates count in single cells
+ end
+
+ def test_parse_dimensions
+ input= " DIMENSIONS NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" blorf=2; "
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+
+ NexusFile::Parser.new(lexer,builder).parse_dimensions
+ foo = builder.nexus_file
+
+ assert_equal "10", foo.vars[:nchar]
+ assert_equal "10", foo.vars[:ntaxa]
+ assert_equal "999", foo.vars[:nfoo]
+ assert_equal 'a b c', foo.vars[:nbar]
+ assert_equal '2', foo.vars[:blorf]
+ # add test that nothing is left in lexer
+ end
+
+ def test_parse_format
+ input = "FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+
+ NexusFile::Parser.new(lexer,builder).parse_format
+ foo = builder.nexus_file
+
+ assert_equal "STANDARD", foo.vars[:datatype]
+ assert_equal "-", foo.vars[:gap]
+ assert_equal "?", foo.vars[:missing]
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
+ # add test that nothing is left in lexer
+ end
+
+ def test_parse_chr_state_labels
+ input =" CHARSTATELABELS
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
+ MATRIX
+ fooo 01 more stuff here that should not be hit"
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+
+ (0..9).each{builder.stub_chr()}
+
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
+
+ foo = builder.nexus_file
+ assert_equal 10, foo.characters.size
+ assert_equal "Tibia_II", foo.characters[0].name
+ assert_equal "norm", foo.characters[0].states["0"].name
+ assert_equal "modified", foo.characters[0].states["1"].name
+
+ assert_equal "TII_macrosetae", foo.characters[1].name
+ assert_equal "= TI", foo.characters[1].states["0"].name
+ assert_equal "stronger", foo.characters[1].states["1"].name
+
+ assert_equal "Femoral_tuber", foo.characters[2].name
+ assert_equal "abs", foo.characters[2].states["0"].name
+ assert_equal "pres", foo.characters[2].states["1"].name
+ assert_equal "m-setae", foo.characters[2].states["2"].name
+
+ assert_equal "Undefined", foo.characters[3].name
+ assert_equal 0, foo.characters[3].states.keys.size
+
+ assert_equal "Cymbium", foo.characters[4].name
+ assert_equal "dorsal", foo.characters[4].states["0"].name
+ assert_equal "mesal", foo.characters[4].states["1"].name
+ assert_equal "lateral", foo.characters[4].states["2"].name
+
+ assert_equal "Paracymbium", foo.characters[5].name
+ assert_equal "abs", foo.characters[5].states["0"].name
+ assert_equal "pres", foo.characters[5].states["1"].name
+
+ assert_equal "Globular_tegulum", foo.characters[6].name
+ assert_equal "abs", foo.characters[6].states["0"].name
+ assert_equal "pres", foo.characters[6].states["1"].name
+
+ assert_equal "Undefined", foo.characters[7].name
+ assert_equal "entire", foo.characters[7].states["0"].name
+ assert_equal "w_lobe", foo.characters[7].states["1"].name
+
+ # ...
+
+ assert_equal "Median_apophysis", foo.characters[9].name
+ assert_equal "pres", foo.characters[9].states["0"].name
+ assert_equal "abs", foo.characters[9].states["1"].name
+ end
+
+ def test_strange_chr_state_labels
+ input =" CHARSTATELABELS
+ 29 'Metatarsal trichobothria (CodAra.29)' / 37623 '>2', 30 'Spinneret cuticle (CodAra.30)' / annulate ridged squamate;
+ Matrix
+ fooo 01 more stuff here that should not be hit"
+
+ builder = NexusFile::Builder.new
+ lexer = NexusFile::Lexer.new(input)
+
+ (0..29).each{builder.stub_chr()}
+
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
+
+ foo = builder.nexus_file
+
+ assert_equal "Metatarsal trichobothria (CodAra.29)", foo.characters[28].name
+ assert_equal "37623", foo.characters[28].states["0"].name
+ assert_equal ">2", foo.characters[28].states["1"].name
+
+ assert_equal "Spinneret cuticle (CodAra.30)", foo.characters[29].name
+ assert_equal "annulate", foo.characters[29].states["0"].name
+ assert_equal "ridged", foo.characters[29].states["1"].name
+ assert_equal "squamate", foo.characters[29].states["2"].name
+
+ end
+
+ def DONT_test_parse_really_long_string_of_chr_state_labels
+ input =" CHARSTATELABELS