Permalink
Browse files

more tokenizer updates: self closing tag flag

  • Loading branch information...
1 parent a0a6697 commit 0393f46f0b74762237cc3afc5dd79db883baf0ce ryansking committed Sep 3, 2008
Showing with 28 additions and 4 deletions.
  1. +2 −1 lib/html5/constants.rb
  2. +25 −2 lib/html5/tokenizer.rb
  3. +1 −1 test/test_tokenizer.rb
View
@@ -1041,7 +1041,8 @@ def self._(str); str end
". Expected end of file."),
"unexpected-end-table-in-caption" =>
_("Unexpected end table tag in caption. Generates implied end caption."),
- "end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode.")
+ "end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode."),
+ "expected-self-closing-tag" => _("Expected a > after the /.")
}
end
View
@@ -428,8 +428,7 @@ def tag_name_state
elsif data == ">"
emit_current_token
elsif data == "/"
- process_solidus_in_tag
- @state = :before_attribute_name_state
+ @state = :self_closing_tag_state
else
@current_token[:name] += data
end
@@ -627,6 +626,11 @@ def after_attribute_value_state
if !process_solidus_in_tag
@state = :before_attribute_name_state
end
+ elsif data == :EOF
+ @token_queue << {:type => :ParseError, :data => "unexpected-EOF-after-attribute-value"}
+ emit_current_token
+ @stream.unget(data)
+ @state = :data_state
else
@token_queue.push({:type => :ParseError, :data => "unexpected-character-after-attribute-value"})
@stream.unget(data)
@@ -635,6 +639,25 @@ def after_attribute_value_state
true
end
+ def self_closing_tag_state
+ c = @stream.char
+ case c
+ when ">"
+ emit_current_token
+ @current_token[:self_closing] = true
+ @state = :data_state
+ when :EOF
+ @token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
+ @stream.unget(c)
+ @state = :data_state
+ else
+ @token_queue << {:type => :ParseError, :data => "expected-self-closing-tag"}
+ @stream.unget(c)
+ @state = :before_attribute_name_state
+ end
+ true
+ end
+
def bogus_comment_state
# Make a new comment token and give it as value all the characters
# until the first > or :EOF (chars_until checks for :EOF automatically)
View
@@ -63,7 +63,7 @@ def tokenizer_test(data)
'' ] * "\n"
assert_nothing_raised message do
- tokenizer = HTML5::HTMLTokenizer.new(data['input'])
+ tokenizer = HTML5::HTMLTokenizer.new(data['input'], :encoding => 'utf-8')
tokenizer.content_model_flag = content_model_flag.to_sym

0 comments on commit 0393f46

Please sign in to comment.