Permalink
Browse files

Parser

  • Loading branch information...
1 parent 65fe953 commit 9db933db6e36c3b9e4c7e3a82f33a2e4b93c4d8b @nickg committed Jun 18, 2008
Showing with 875 additions and 3 deletions.
  1. +8 −3 bibtex/lexer.rb
  2. +99 −0 bibtex/parser.rb
  3. +15 −0 bibtex/test_parser.rb
  4. +753 −0 example.bib
View
@@ -76,15 +76,20 @@ def src_pos
end
def next_token!
- skip_whitespace
- if @scanner.check /\n/ then
+ if @scanner.check /^\s*\n/ then
@lineno += 1
@cols_prev = @scanner.pos + 1
end
+ skip_whitespace
@rules.each do |regexp, result|
return result if @lval = @scanner.scan(regexp)
end
- raise LexerError.new("Unexpected input #{@scanner.rest}", src_pos)
+ unexpect = if @scanner.rest.length < 10 then
+ @scanner.rest
+ else
+ "#{@scanner.rest.first 10}..."
+ end
+ raise LexerError.new("Unexpected input #{unexpect}", src_pos)
end
def peek_token
View
@@ -0,0 +1,99 @@
+require 'bibtex/bibliography'
+require 'bibtex/entry'
+require 'bibtex/field'
+require 'bibtex/lexer'
+
+module BibTeX
+
+ class Parser
+ def self.parse(filename)
+ @lexer.feed File.read(filename)
+
+ b = Bibliography.new
+ while @lexer.more_tokens?
+ b << parse_entry
+ end
+ return b
+ end
+
+ private
+
+ def self.parse_entry
+ expect :at, '@'
+ type = expect :id
+ expect :lbrace, '{'
+ key = expect :id
+
+ e = Entry.new(type, key)
+ while @lexer.peek_token != :rbrace
+ expect :comma, ','
+ e.add_field parse_field
+ end
+
+ expect :rbrace, '}'
+ return e
+ end
+
+ def self.parse_field
+ key = expect :id
+ expect :equals, '='
+ value = parse_value
+ Field.new(key.intern, value)
+ end
+
+ def self.parse_value
+ close = :rbrace
+ if @lexer.peek_token == :dquote then
+ expect :dquote
+ close = :dquote
+ else
+ expect :lbrace, '{'
+ end
+
+ brace_count = 1
+ str = ''
+ @lexer.ignore_whitespace = false
+ loop do
+ unless @lexer.more_tokens?
+ raise 'Unexpected end of input'
+ end
+
+ case @lexer.next_token!
+ when :rbrace, close
+ brace_count -= 1
+ if brace_count == 0 then
+ @lexer.ignore_whitespace = true
+ return str
+ end
+ when :lbrace
+ str += '{'
+ brace_count += 1
+ else
+ str += @lexer.lval
+ end
+ end
+ end
+
+ def self.expect(token, pretty = nil)
+ pretty ||= token.to_s
+ got = @lexer.next_token!
+ unless got == token then
+ raise "#{@lexer.src_pos}: Expected '#{pretty}' but found '#{got}'"
+ else
+ @lexer.lval
+ end
+ end
+
+ @lexer = Lexer.new(true) do |rules|
+ rules.match /@/, :at
+ rules.match /\{/, :lbrace
+ rules.match /\}/, :rbrace
+ rules.match /\"/, :dquote
+ rules.match /\=/, :equals
+ rules.match /\,/, :comma
+ rules.match /[\w\-_:]+/, :id
+ rules.match /.+?/, :cdata
+ end
+ end
+
+end
View
@@ -0,0 +1,15 @@
+require 'bibtex/parser'
+require 'test/unit'
+
+class TestParser < Test::Unit::TestCase
+ include BibTeX
+
+ def test_basic
+ b = Parser.parse 'example.bib'
+
+ ryan98 = b['ryan98']
+ assert_kind_of Entry, ryan98
+ assert_equal EntryType::Article, ryan98.type
+ assert_equal 1998, ryan98[:year].to_i
+ end
+end
Oops, something went wrong.

0 comments on commit 9db933d

Please sign in to comment.