Convert rexical css parser to oedipus_lex

sparklemotion · Jul 20, 2023 · d5fb727 · d5fb727
1 parent c1f733e
commit d5fb727
Show file tree

Hide file tree

Showing 3 changed files with 238 additions and 163 deletions.
diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb
@@ -1,155 +1,223 @@
 # frozen_string_literal: true
+# encoding: UTF-8
 #--
-# DO NOT MODIFY!!!!
-# This file is automatically generated by rex 1.0.7
-# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
+# This file is automatically generated. Do not modify it.
+# Generated by: oedipus_lex version 2.6.1.
+# Source: lib/nokogiri/css/tokenizer.rex
 #++
 
-module Nokogiri
-module CSS
-# :nodoc: all
-class Tokenizer
-      require 'strscan'
 
-      class ScanError < StandardError ; end
+##
+# The generated lexer Nokogiri::CSS::Tokenizer
 
-      attr_reader   :lineno
-      attr_reader   :filename
-      attr_accessor :state
+class Nokogiri::CSS::Tokenizer
+  require 'strscan'
 
-      def scan_setup(str)
-        @ss = StringScanner.new(str)
-        @lineno =  1
-        @state  = nil
-      end
+  # :stopdoc:
+  NL       = /\n|\r\n|\r|\f/
+  W        = /[\s]*/
+  NONASCII = /[^\0-\177]/
+  NUM      = /-?([0-9]+|[0-9]*\.[0-9]+)/
+  UNICODE  = /[0-9A-Fa-f]{1,6}(\r\n|[\s])?/
+  ESCAPE   = /#{UNICODE}|\\[^\n\r\f0-9A-Fa-f]/
+  NMCHAR   = /[_A-Za-z0-9-]|#{NONASCII}|#{ESCAPE}/
+  NMSTART  = /[_A-Za-z]|#{NONASCII}|#{ESCAPE}/
+  IDENT    = /-?(#{NMSTART})(#{NMCHAR})*/
+  NAME     = /(#{NMCHAR})+/
+  STRING1  = /"([^\n\r\f"]|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*"/
+  STRING2  = /'([^\n\r\f']|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*'/
+  STRING   = /#{STRING1}|#{STRING2}/
+  # :startdoc:
+  # :stopdoc:
+  class LexerError < StandardError ; end
+  class ScanError < LexerError ; end
+  # :startdoc:
 
-      def action
-        yield
-      end
+  ##
+  # The current line number.
 
-      def scan_str(str)
-        scan_setup(str)
-        do_parse
-      end
-      alias :scan :scan_str
+  attr_accessor :lineno
+  ##
+  # The file name / path
 
-      def load_file( filename )
-        @filename = filename
-        File.open(filename, "r") do |f|
-          scan_setup(f.read)
-        end
-      end
-
-      def scan_file( filename )
-        load_file(filename)
-        do_parse
-      end
+  attr_accessor :filename
 
+  ##
+  # The StringScanner for this lexer.
 
-        def next_token
-          return if @ss.eos?
+  attr_accessor :ss
 
-          # skips empty actions
-          until token = _next_token or @ss.eos?; end
-          token
-        end
+  ##
+  # The current lexical state.
 
-        def _next_token
-          text = @ss.peek(1)
-          @lineno  +=  1  if text == "\n"
-          token = case @state
-            when nil
-          case
-                  when (text = @ss.scan(/has\([\s]*/))
-                     action { [:HAS, text] }
+  attr_accessor :state
 
-                  when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
-                     action { [:FUNCTION, text] }
+  alias :match :ss
 
-                  when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
-                     action { [:IDENT, text] }
+  ##
+  # The match groups for the current scan.
 
-                  when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
-                     action { [:HASH, text] }
+  def matches
+    m = (1..9).map { |i| ss[i] }
+    m.pop until m[-1] or m.empty?
+    m
+  end
 
-                  when (text = @ss.scan(/[\s]*~=[\s]*/))
-                     action { [:INCLUDES, text] }
+  ##
+  # Yields on the current action.
 
-                  when (text = @ss.scan(/[\s]*\|=[\s]*/))
-                     action { [:DASHMATCH, text] }
+  def action
+    yield
+  end
 
-                  when (text = @ss.scan(/[\s]*\^=[\s]*/))
-                     action { [:PREFIXMATCH, text] }
+  ##
+  # The previous position. Only available if the :column option is on.
 
-                  when (text = @ss.scan(/[\s]*\$=[\s]*/))
-                     action { [:SUFFIXMATCH, text] }
+  attr_accessor :old_pos
 
-                  when (text = @ss.scan(/[\s]*\*=[\s]*/))
-                     action { [:SUBSTRINGMATCH, text] }
+  ##
+  # The position of the start of the current line. Only available if the
+  # :column option is on.
 
-                  when (text = @ss.scan(/[\s]*!=[\s]*/))
-                     action { [:NOT_EQUAL, text] }
+  attr_accessor :start_of_current_line_pos
 
-                  when (text = @ss.scan(/[\s]*=[\s]*/))
-                     action { [:EQUAL, text] }
+  ##
+  # The current column, starting at 0. Only available if the
+  # :column option is on.
+  def column
+    old_pos - start_of_current_line_pos
+  end
 
-                  when (text = @ss.scan(/[\s]*\)/))
-                     action { [:RPAREN, text] }
 
-                  when (text = @ss.scan(/\[[\s]*/))
-                     action { [:LSQUARE, text] }
+  ##
+  # The current scanner class. Must be overridden in subclasses.
 
-                  when (text = @ss.scan(/[\s]*\]/))
-                     action { [:RSQUARE, text] }
+  def scanner_class
+    StringScanner
+  end unless instance_methods(false).map(&:to_s).include?("scanner_class")
 
-                  when (text = @ss.scan(/[\s]*\+[\s]*/))
-                     action { [:PLUS, text] }
+  ##
+  # Parse the given string.
 
-                  when (text = @ss.scan(/[\s]*>[\s]*/))
-                     action { [:GREATER, text] }
+  def parse str
+    self.ss     = scanner_class.new str
+    self.lineno = 1
+    self.start_of_current_line_pos = 0
+    self.state  ||= nil
 
-                  when (text = @ss.scan(/[\s]*,[\s]*/))
-                     action { [:COMMA, text] }
+    do_parse
+  end
 
-                  when (text = @ss.scan(/[\s]*~[\s]*/))
-                     action { [:TILDE, text] }
+  ##
+  # Read in and parse the file at +path+.
 
-                  when (text = @ss.scan(/\:not\([\s]*/))
-                     action { [:NOT, text] }
+  def parse_file path
+    self.filename = path
+    open path do |f|
+      parse f.read
+    end
+  end
 
-                  when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
-                     action { [:NUMBER, text] }
+  ##
+  # The current location in the parse.
 
-                  when (text = @ss.scan(/[\s]*\/\/[\s]*/))
-                     action { [:DOUBLESLASH, text] }
+  def location
+    [
+      (filename || "<input>"),
+      lineno,
+      column,
+    ].compact.join(":")
+  end
 
-                  when (text = @ss.scan(/[\s]*\/[\s]*/))
-                     action { [:SLASH, text] }
+  ##
+  # Lex the next token.
 
-                  when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
-                     action {[:UNICODE_RANGE, text] }
+  def next_token
 
-                  when (text = @ss.scan(/[\s]+/))
-                     action { [:S, text] }
+    token = nil
 
-                  when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
-                     action { [:STRING, text] }
+    until ss.eos? or token do
+      if ss.check(/\n/) then
+        self.lineno += 1
+        # line starts 1 position after the newline
+        self.start_of_current_line_pos = ss.pos + 1
+      end
+      self.old_pos = ss.pos
+      token =
+        case state
+        when nil then
+          case
+          when text = ss.scan(/has\(#{W}/) then
+            action { [:HAS, text] }
+          when text = ss.scan(/#{NUM}/) then
+            action { [:NUMBER, text] }
+          when text = ss.scan(/#{IDENT}\(#{W}/) then
+            action { [:FUNCTION, text] }
+          when text = ss.scan(/#{IDENT}/) then
+            action { [:IDENT, text] }
+          when text = ss.scan(/##{NAME}/) then
+            action { [:HASH, text] }
+          when text = ss.scan(/#{W}\~=#{W}/) then
+            action { [:INCLUDES, text] }
+          when text = ss.scan(/#{W}\|=#{W}/) then
+            action { [:DASHMATCH, text] }
+          when text = ss.scan(/#{W}\^=#{W}/) then
+            action { [:PREFIXMATCH, text] }
+          when text = ss.scan(/#{W}\$=#{W}/) then
+            action { [:SUFFIXMATCH, text] }
+          when text = ss.scan(/#{W}\*=#{W}/) then
+            action { [:SUBSTRINGMATCH, text] }
+          when text = ss.scan(/#{W}!=#{W}/) then
+            action { [:NOT_EQUAL, text] }
+          when text = ss.scan(/#{W}=#{W}/) then
+            action { [:EQUAL, text] }
+          when text = ss.scan(/#{W}\)/) then
+            action { [:RPAREN, text] }
+          when text = ss.scan(/\[#{W}/) then
+            action { [:LSQUARE, text] }
+          when text = ss.scan(/#{W}\]/) then
+            action { [:RSQUARE, text] }
+          when text = ss.scan(/#{W}\+#{W}/) then
+            action { [:PLUS, text] }
+          when text = ss.scan(/#{W}>#{W}/) then
+            action { [:GREATER, text] }
+          when text = ss.scan(/#{W},#{W}/) then
+            action { [:COMMA, text] }
+          when text = ss.scan(/#{W}~#{W}/) then
+            action { [:TILDE, text] }
+          when text = ss.scan(/:not\(#{W}/) then
+            action { [:NOT, text] }
+          when text = ss.scan(/#{W}\/\/#{W}/) then
+            action { [:DOUBLESLASH, text] }
+          when text = ss.scan(/#{W}\/#{W}/) then
+            action { [:SLASH, text] }
+          when text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/) then
+            action {[:UNICODE_RANGE, text] }
+          when text = ss.scan(/[\s]+/) then
+            action { [:S, text] }
+          when text = ss.scan(/#{STRING}/) then
+            action { [:STRING, text] }
+          when text = ss.scan(/./) then
+            action { [text, text] }
+          else
+            text = ss.string[ss.pos .. -1]
+            raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
+          end
+        else
+          raise ScanError, "undefined state at #{location}: '#{state}'"
+        end # token = case state
 
-                  when (text = @ss.scan(/./))
-                     action { [text, text] }
+      next unless token # allow functions to trigger redo w/ nil
+    end # while
 
-
-          else
-            text = @ss.string[@ss.pos .. -1]
-            raise  ScanError, "can not match: '" + text + "'"
-          end  # if
+    raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
+      token.nil? || (Array === token && token.size >= 2)
 
-        else
-          raise  ScanError, "undefined state: '" + state.to_s + "'"
-        end  # case state
-          token
-        end  # def _next_token
+    # auto-switch state
+    self.state = token.last if token && token.first == :state
 
+    token
+  end # def next_token
+    def do_parse
+    end
 end # class
-end
-end