Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 108 additions & 106 deletions lib/rdoc/markup/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -272,44 +272,11 @@ def build_verbatim margin
end

case type
when :HEADER then
line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
indent = peek_column - column - data
line << ' ' * indent
when :RULE then
width = 2 + data
line << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + width
indent = peek_column - column - width
line << ' ' * indent
when :BREAK, :TEXT then
line << data
when :BLOCKQUOTE then
line << '>>>'
peek_type, _, peek_column = peek_token
if peek_type != :NEWLINE and peek_column
line << ' ' * (peek_column - column - 3)
end
else # *LIST_TOKENS
list_marker = case type
when :BULLET then data
when :LABEL then "[#{data}]"
when :NOTE then "#{data}::"
else # :LALPHA, :NUMBER, :UALPHA
"#{data}."
end
line << list_marker
peek_type, _, peek_column = peek_token
unless peek_type == :NEWLINE then
peek_column ||= column + list_marker.length
indent = peek_column - column - list_marker.length
line << ' ' * indent
end
else
raise TypeError, "unexpected token under verbatim: #{type}"
end

end

verbatim << line << "\n" unless line.empty?
Expand Down Expand Up @@ -481,11 +448,37 @@ def skip token_type, error = true
##
# Turns text +input+ into a stream of tokens

def tokenize input
def tokenize(input)
setup_scanner input
margin = @s.pos[0]
tokenize_indented(margin)
tokenize_input(margin)
end

def newline!(pos = nil)
if pos or (@s.scan(/ *(?=\r?\n)/) and pos = @s.pos and @s.scan(/\r?\n/))
@tokens << [:NEWLINE, @s.matched, *pos]
@s.newline!
end
end

until @s.eos? do
def tokenize_indented(column)
indent = / {#{column+1},}(?=\S)| *(?=\r?\n)/
while @s.scan(indent)
pos = @s.pos
if @s.scan(/(.+)(?=\r?\n)?/)
@tokens << [:TEXT, @s.matched, *pos]
end
newline! or break
end
end

def tokenize_input(margin)
column = 0

until @s.eos?
pos = @s.pos
break if pos[0] < (margin ||= pos[0])

# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
Expand All @@ -494,75 +487,84 @@ def tokenize input
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token

@tokens << case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/) then
token = [:NEWLINE, @s.matched, *pos]
@s.newline!
token
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *pos]

if @s[2] =~ /^\r?\n/ then
@s.unscan(@s[2])
header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *pos]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(\S)/) then
@s.unscan(@s[2])
[:BULLET, @s[1], *pos]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
@s.unscan(@s[2])
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *pos]
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *pos]
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *pos]
# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?$/) then
if word = @s[1]
@s.unscan(word)
end
[:BLOCKQUOTE, word, *pos]
# anything else: :TEXT
else
@s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *pos]

if @s[2] then
@tokens << token
[:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
else
token
end
end
case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/)
newline!(pos)
next

# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/)
level = @s[1].length
header = [:HEADER, level, *pos]

if @s[2] =~ /^\r?\n/
@s.unscan(@s[2])
@tokens << header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
@tokens << [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end

# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/)
@tokens << [:RULE, @s[1].length - 2, *pos]

# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(?=\S)/)
@tokens << [:BULLET, @s[1], *pos]
tokenize_input(nil)

# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(?=\S)/i)
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
@tokens << [list_type, list_label, *pos]
tokenize_input(nil)

# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/)
@tokens << [:LABEL, @s[1], *pos]
tokenize_input(nil)

# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/)
@tokens << [:NOTE, @s[1], *pos]
tokenize_input(nil)

# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?\r?$/)
@tokens << [:BLOCKQUOTE, @s[1], *pos]
newline!
tokenize_input(nil)

# anything else: :TEXT
else
column = pos[0]
@s.scan(/(.*?)( )?\r?$/)
@tokens << [:TEXT, @s[1], *pos]

if @s[2]
@tokens << [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
end
if newline!
tokenize_indented(column)
end
end
end

self
Expand Down
21 changes: 10 additions & 11 deletions test/rdoc/test_rdoc_markup_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1591,8 +1591,7 @@ def test_tokenize_verbatim_heading
[:TEXT, 'Example heading:', 0, 0],
[:NEWLINE, "\n", 16, 0],
[:NEWLINE, "\n", 0, 1],
[:HEADER, 3, 3, 2],
[:TEXT, 'heading three', 7, 2],
[:TEXT, '=== heading three', 3, 2],
[:NEWLINE, "\n", 20, 2],
]

Expand All @@ -1608,7 +1607,7 @@ def test_tokenize_verbatim_rule
expected = [
[:TEXT, 'Verbatim section here that is double-underlined', 2, 0],
[:NEWLINE, "\n", 49, 0],
[:HEADER, 47, 2, 1],
[:TEXT, '='*47, 2, 1],
[:NEWLINE, "\n", 49, 1],
]

Expand All @@ -1624,14 +1623,14 @@ def test_tokenize_verbatim_rule_fancy
STR

expected = [
[:TEXT, 'A', 2, 0],
[:NEWLINE, "\n", 3, 0],
[:TEXT, 'b', 4, 1],
[:NEWLINE, "\n", 5, 1],
[:HEADER, 47, 2, 2],
[:NEWLINE, "\n", 49, 2],
[:TEXT, 'c', 4, 3],
[:NEWLINE, "\n", 5, 3],
[:TEXT, 'A', 2, 0],
[:NEWLINE, "\n", 3, 0],
[:TEXT, 'b', 4, 1],
[:NEWLINE, "\n", 5, 1],
[:TEXT, '='*47, 2, 2],
[:NEWLINE, "\n", 49, 2],
[:TEXT, 'c', 4, 3],
[:NEWLINE, "\n", 5, 3],
]

assert_equal expected, @RMP.tokenize(str)
Expand Down