Skip to content
This repository
Browse code

Updated vendor copy of html-scanner lib, for bug fixes and optimizations

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1416 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
  • Loading branch information...
commit c23b2a4ad3f77222b6bfb219610fca79024ca4e5 1 parent bca13f7
Jamis Buck authored June 14, 2005
2  actionpack/CHANGELOG
... ...
@@ -1,5 +1,7 @@
1 1
 *SVN*
2 2
 
  3
+* Updated vendor copy of html-scanner lib to 0.5.1, for bug fixes and optimizations
  4
+
3 5
 * Changed test requests to come from 0.0.0.0 instead of 127.0.0.1 such that they don't trigger debugging screens on exceptions, but instead call rescue_action_in_public
4 6
 
5 7
 * Modernize scaffolding to match the generator: use the new render method and change style from the warty @params["id"] to the sleek params[:id].  #1367
7  actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
... ...
@@ -1,7 +1,7 @@
1 1
 require 'html/tokenizer'
2 2
 require 'html/node'
3 3
 
4  
-module HTML#:nodoc:
  4
+module HTML #:nodoc:
5 5
   
6 6
   # A top-level HTMl document. You give it a body of text, and it will parse that
7 7
   # text into a tree of nodes.
@@ -11,7 +11,7 @@ class Document #:nodoc:
11 11
     attr_reader :root
12 12
 
13 13
     # Create a new Document from the given text.
14  
-    def initialize(text)
  14
+    def initialize(text, strict=false)
15 15
       tokenizer = Tokenizer.new(text)
16 16
       @root = Node.new(nil)
17 17
       node_stack = [ @root ]
@@ -28,7 +28,7 @@ def initialize(text)
28 28
               open_start = 0 if open_start < 0
29 29
               close_start = node.position - 20
30 30
               close_start = 0 if close_start < 0
31  
-              warn <<EOF.strip
  31
+              msg = <<EOF.strip
32 32
 ignoring attempt to close #{node_stack.last.name} with #{node.name}
33 33
   opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
34 34
   closed at byte #{node.position}, line #{node.line}
@@ -36,6 +36,7 @@ def initialize(text)
36 36
   text around open: #{text[open_start,40].inspect}
37 37
   text around close: #{text[close_start,40].inspect}
38 38
 EOF
  39
+              strict ? raise(msg) : warn(msg)
39 40
             end
40 41
           elsif node.closing != :close
41 42
             node_stack.push node
19  actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
... ...
@@ -1,8 +1,8 @@
1 1
 require 'strscan'
2 2
 
3  
-module HTML#:nodoc:
  3
+module HTML #:nodoc:
4 4
   
5  
-  class Conditions < Hash#:nodoc:
  5
+  class Conditions < Hash #:nodoc:
6 6
     def initialize(hash)
7 7
       super()
8 8
       hash = { :content => hash } unless Hash === hash
@@ -54,7 +54,7 @@ def keys_to_symbols(hash)
54 54
   end
55 55
 
56 56
   # The base class of all nodes, textual and otherwise, in an HTML document.
57  
-  class Node#:nodoc:
  57
+  class Node #:nodoc:
58 58
     # The array of children of this node. Not all nodes have children.
59 59
     attr_reader :children
60 60
     
@@ -91,6 +91,8 @@ def match(conditions)
91 91
     # Search the children of this node for the first node for which #find
92 92
     # returns non +nil+. Returns the result of the #find call that succeeded.
93 93
     def find(conditions)
  94
+      conditions = validate_conditions(conditions)
  95
+
94 96
       @children.each do |child|        
95 97
         node = child.find(conditions)
96 98
         return node if node
@@ -101,6 +103,8 @@ def find(conditions)
101 103
     # Search for all nodes that match the given conditions, and return them
102 104
     # as an array.
103 105
     def find_all(conditions)
  106
+      conditions = validate_conditions(conditions)
  107
+
104 108
       matches = []
105 109
       matches << self if match(conditions)
106 110
       @children.each do |child|
@@ -183,7 +187,7 @@ def parse(parent, line, pos, content, strict=true)
183 187
   end
184 188
 
185 189
   # A node that represents text, rather than markup.
186  
-  class Text < Node#:nodoc:
  190
+  class Text < Node #:nodoc:
187 191
     
188 192
     attr_reader :content
189 193
     
@@ -239,7 +243,7 @@ def match(conditions)
239 243
   # A Tag is any node that represents markup. It may be an opening tag, a
240 244
   # closing tag, or a self-closing tag. It has a name, and may have a hash of
241 245
   # attributes.
242  
-  class Tag < Node#:nodoc:
  246
+  class Tag < Node #:nodoc:
243 247
     
244 248
     # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
245 249
     attr_reader :closing
@@ -268,7 +272,9 @@ def [](attr)
268 272
 
269 273
     # Returns non-+nil+ if this tag can contain child nodes.
270 274
     def childless?
271  
-      @name =~ /^(img|br|hr|link|meta|area|base|basefont|col|frame|input|isindex|param)$/o
  275
+      !@closing.nil? ||
  276
+        @name =~ /^(img|br|hr|link|meta|area|base|basefont|
  277
+                    col|frame|input|isindex|param)$/ox
272 278
     end
273 279
 
274 280
     # Returns a textual representation of the node
@@ -284,6 +290,7 @@ def to_s
284 290
         s << " /" if @closing == :self
285 291
         s << ">"
286 292
         @children.each { |child| s << child.to_s }
  293
+        s << "</#{@name}>" if @closing != :self && !@children.empty?
287 294
         s
288 295
       end
289 296
     end
13  actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
... ...
@@ -1,6 +1,6 @@
1 1
 require 'strscan'
2 2
 
3  
-module HTML#:nodoc:
  3
+module HTML #:nodoc:
4 4
   
5 5
   # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
6 6
   # token is a string. Each string represents either "text", or an HTML element.
@@ -13,7 +13,7 @@ module HTML#:nodoc:
13 13
   #   while token = tokenizer.next
14 14
   #     p token
15 15
   #   end
16  
-  class Tokenizer#:nodoc:
  16
+  class Tokenizer #:nodoc:
17 17
     
18 18
     # The current (byte) position in the text
19 19
     attr_reader :position
@@ -51,7 +51,7 @@ def scan_tag
51 51
         tag = @scanner.getch
52 52
         if @scanner.scan(/!--/) # comment
53 53
           tag << @scanner.matched
54  
-          tag << @scanner.scan_until(/--\s*>/)
  54
+          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
55 55
         elsif @scanner.scan(/!/) # doctype
56 56
           tag << @scanner.matched
57 57
           tag << consume_quoted_regions
@@ -63,14 +63,13 @@ def scan_tag
63 63
 
64 64
       # Scan all text up to the next < character and return it.
65 65
       def scan_text
66  
-        @scanner.getch + (@scanner.scan(/[^<]*/) || "")
  66
+        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
67 67
       end
68 68
       
69 69
       # Counts the number of newlines in the text and updates the current line
70 70
       # accordingly.
71 71
       def update_current_line(text)
72  
-        @current_line += text.scan(/\r\n|\r|\n/).length
73  
-        text
  72
+        text.scan(/\r?\n/) { @current_line += 1 }
74 73
       end
75 74
       
76 75
       # Skips over quoted strings, so that less-than and greater-than characters
@@ -89,7 +88,7 @@ def consume_quoted_regions
89 88
           text << match
90 89
           break if delim == "<" || delim == ">"
91 90
 
92  
-          # consume the conqued region
  91
+          # consume the quoted region
93 92
           while match = @scanner.scan_until(/[\\#{delim}]/)
94 93
             text << match
95 94
             break if @scanner.matched == delim
6  actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
... ...
@@ -1,9 +1,9 @@
1  
-module HTML#:nodoc:
2  
-  module Version#:nodoc:
  1
+module HTML #:nodoc:
  2
+  module Version #:nodoc:
3 3
 
4 4
     MAJOR = 0
5 5
     MINOR = 5
6  
-    TINY  = 0
  6
+    TINY  = 1
7 7
 
8 8
     STRING = [ MAJOR, MINOR, TINY ].join(".")
9 9
 

0 notes on commit c23b2a4

Please sign in to comment.
Something went wrong with that request. Please try again.