Skip to content

Commit

Permalink
Convert rexical css parser to oedipus_lex
Browse files Browse the repository at this point in the history
  • Loading branch information
zenspider committed Jul 20, 2023
1 parent c1f733e commit d5fb727
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 163 deletions.
292 changes: 180 additions & 112 deletions lib/nokogiri/css/tokenizer.rb
Original file line number Diff line number Diff line change
@@ -1,155 +1,223 @@
# frozen_string_literal: true
# encoding: UTF-8
#--
# DO NOT MODIFY!!!!
# This file is automatically generated by rex 1.0.7
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
# This file is automatically generated. Do not modify it.
# Generated by: oedipus_lex version 2.6.1.
# Source: lib/nokogiri/css/tokenizer.rex
#++

module Nokogiri
module CSS
# :nodoc: all
class Tokenizer
require 'strscan'

class ScanError < StandardError ; end
##
# The generated lexer Nokogiri::CSS::Tokenizer

attr_reader :lineno
attr_reader :filename
attr_accessor :state
class Nokogiri::CSS::Tokenizer
require 'strscan'

def scan_setup(str)
@ss = StringScanner.new(str)
@lineno = 1
@state = nil
end
# :stopdoc:
NL = /\n|\r\n|\r|\f/
W = /[\s]*/
NONASCII = /[^\0-\177]/
NUM = /-?([0-9]+|[0-9]*\.[0-9]+)/
UNICODE = /[0-9A-Fa-f]{1,6}(\r\n|[\s])?/
ESCAPE = /#{UNICODE}|\\[^\n\r\f0-9A-Fa-f]/
NMCHAR = /[_A-Za-z0-9-]|#{NONASCII}|#{ESCAPE}/
NMSTART = /[_A-Za-z]|#{NONASCII}|#{ESCAPE}/
IDENT = /-?(#{NMSTART})(#{NMCHAR})*/
NAME = /(#{NMCHAR})+/
STRING1 = /"([^\n\r\f"]|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*"/
STRING2 = /'([^\n\r\f']|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*'/
STRING = /#{STRING1}|#{STRING2}/
# :startdoc:
# :stopdoc:
class LexerError < StandardError ; end
class ScanError < LexerError ; end
# :startdoc:

def action
yield
end
##
# The current line number.

def scan_str(str)
scan_setup(str)
do_parse
end
alias :scan :scan_str
attr_accessor :lineno
##
# The file name / path

def load_file( filename )
@filename = filename
File.open(filename, "r") do |f|
scan_setup(f.read)
end
end

def scan_file( filename )
load_file(filename)
do_parse
end
attr_accessor :filename

##
# The StringScanner for this lexer.

def next_token
return if @ss.eos?
attr_accessor :ss

# skips empty actions
until token = _next_token or @ss.eos?; end
token
end
##
# The current lexical state.

def _next_token
text = @ss.peek(1)
@lineno += 1 if text == "\n"
token = case @state
when nil
case
when (text = @ss.scan(/has\([\s]*/))
action { [:HAS, text] }
attr_accessor :state

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
action { [:FUNCTION, text] }
alias :match :ss

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
action { [:IDENT, text] }
##
# The match groups for the current scan.

when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
action { [:HASH, text] }
def matches
m = (1..9).map { |i| ss[i] }
m.pop until m[-1] or m.empty?
m
end

when (text = @ss.scan(/[\s]*~=[\s]*/))
action { [:INCLUDES, text] }
##
# Yields on the current action.

when (text = @ss.scan(/[\s]*\|=[\s]*/))
action { [:DASHMATCH, text] }
def action
yield
end

when (text = @ss.scan(/[\s]*\^=[\s]*/))
action { [:PREFIXMATCH, text] }
##
# The previous position. Only available if the :column option is on.

when (text = @ss.scan(/[\s]*\$=[\s]*/))
action { [:SUFFIXMATCH, text] }
attr_accessor :old_pos

when (text = @ss.scan(/[\s]*\*=[\s]*/))
action { [:SUBSTRINGMATCH, text] }
##
# The position of the start of the current line. Only available if the
# :column option is on.

when (text = @ss.scan(/[\s]*!=[\s]*/))
action { [:NOT_EQUAL, text] }
attr_accessor :start_of_current_line_pos

when (text = @ss.scan(/[\s]*=[\s]*/))
action { [:EQUAL, text] }
##
# The current column, starting at 0. Only available if the
# :column option is on.
def column
old_pos - start_of_current_line_pos
end

when (text = @ss.scan(/[\s]*\)/))
action { [:RPAREN, text] }

when (text = @ss.scan(/\[[\s]*/))
action { [:LSQUARE, text] }
##
# The current scanner class. Must be overridden in subclasses.

when (text = @ss.scan(/[\s]*\]/))
action { [:RSQUARE, text] }
def scanner_class
StringScanner
end unless instance_methods(false).map(&:to_s).include?("scanner_class")

when (text = @ss.scan(/[\s]*\+[\s]*/))
action { [:PLUS, text] }
##
# Parse the given string.

when (text = @ss.scan(/[\s]*>[\s]*/))
action { [:GREATER, text] }
def parse str
self.ss = scanner_class.new str
self.lineno = 1
self.start_of_current_line_pos = 0
self.state ||= nil

when (text = @ss.scan(/[\s]*,[\s]*/))
action { [:COMMA, text] }
do_parse
end

when (text = @ss.scan(/[\s]*~[\s]*/))
action { [:TILDE, text] }
##
# Read in and parse the file at +path+.

when (text = @ss.scan(/\:not\([\s]*/))
action { [:NOT, text] }
def parse_file path
self.filename = path
open path do |f|
parse f.read
end
end

when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
action { [:NUMBER, text] }
##
# The current location in the parse.

when (text = @ss.scan(/[\s]*\/\/[\s]*/))
action { [:DOUBLESLASH, text] }
def location
[
(filename || "<input>"),
lineno,
column,
].compact.join(":")
end

when (text = @ss.scan(/[\s]*\/[\s]*/))
action { [:SLASH, text] }
##
# Lex the next token.

when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
action {[:UNICODE_RANGE, text] }
def next_token

when (text = @ss.scan(/[\s]+/))
action { [:S, text] }
token = nil

when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
action { [:STRING, text] }
until ss.eos? or token do
if ss.check(/\n/) then
self.lineno += 1
# line starts 1 position after the newline
self.start_of_current_line_pos = ss.pos + 1
end
self.old_pos = ss.pos
token =
case state
when nil then
case
when text = ss.scan(/has\(#{W}/) then
action { [:HAS, text] }
when text = ss.scan(/#{NUM}/) then
action { [:NUMBER, text] }
when text = ss.scan(/#{IDENT}\(#{W}/) then
action { [:FUNCTION, text] }
when text = ss.scan(/#{IDENT}/) then
action { [:IDENT, text] }
when text = ss.scan(/##{NAME}/) then
action { [:HASH, text] }
when text = ss.scan(/#{W}\~=#{W}/) then
action { [:INCLUDES, text] }
when text = ss.scan(/#{W}\|=#{W}/) then
action { [:DASHMATCH, text] }
when text = ss.scan(/#{W}\^=#{W}/) then
action { [:PREFIXMATCH, text] }
when text = ss.scan(/#{W}\$=#{W}/) then
action { [:SUFFIXMATCH, text] }
when text = ss.scan(/#{W}\*=#{W}/) then
action { [:SUBSTRINGMATCH, text] }
when text = ss.scan(/#{W}!=#{W}/) then
action { [:NOT_EQUAL, text] }
when text = ss.scan(/#{W}=#{W}/) then
action { [:EQUAL, text] }
when text = ss.scan(/#{W}\)/) then
action { [:RPAREN, text] }
when text = ss.scan(/\[#{W}/) then
action { [:LSQUARE, text] }
when text = ss.scan(/#{W}\]/) then
action { [:RSQUARE, text] }
when text = ss.scan(/#{W}\+#{W}/) then
action { [:PLUS, text] }
when text = ss.scan(/#{W}>#{W}/) then
action { [:GREATER, text] }
when text = ss.scan(/#{W},#{W}/) then
action { [:COMMA, text] }
when text = ss.scan(/#{W}~#{W}/) then
action { [:TILDE, text] }
when text = ss.scan(/:not\(#{W}/) then
action { [:NOT, text] }
when text = ss.scan(/#{W}\/\/#{W}/) then
action { [:DOUBLESLASH, text] }
when text = ss.scan(/#{W}\/#{W}/) then
action { [:SLASH, text] }
when text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/) then
action {[:UNICODE_RANGE, text] }
when text = ss.scan(/[\s]+/) then
action { [:S, text] }
when text = ss.scan(/#{STRING}/) then
action { [:STRING, text] }
when text = ss.scan(/./) then
action { [text, text] }
else
text = ss.string[ss.pos .. -1]
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
end
else
raise ScanError, "undefined state at #{location}: '#{state}'"
end # token = case state

when (text = @ss.scan(/./))
action { [text, text] }
next unless token # allow functions to trigger redo w/ nil
end # while


else
text = @ss.string[@ss.pos .. -1]
raise ScanError, "can not match: '" + text + "'"
end # if
raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
token.nil? || (Array === token && token.size >= 2)

else
raise ScanError, "undefined state: '" + state.to_s + "'"
end # case state
token
end # def _next_token
# auto-switch state
self.state = token.last if token && token.first == :state

token
end # def next_token
def do_parse
end
end # class
end
end

0 comments on commit d5fb727

Please sign in to comment.