Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip - migrate to oedipus_lex #2934

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ group :development do
gem "hoe-markdown", "= 1.4.0"

# parser generator
gem "rexical", "= 1.0.7"
gem "oedipus_lex", "= 2.6.1"

# tests
gem "minitest", "5.18.1"
Expand Down
2 changes: 1 addition & 1 deletion lib/nokogiri/css/parser_extras.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def initialize(namespaces = {})
end

def parse(string)
@tokenizer.scan_setup(string)
@tokenizer.parse(string)
do_parse
end

Expand Down
292 changes: 180 additions & 112 deletions lib/nokogiri/css/tokenizer.rb
Original file line number Diff line number Diff line change
@@ -1,155 +1,223 @@
# frozen_string_literal: true
# encoding: UTF-8
#--
# DO NOT MODIFY!!!!
# This file is automatically generated by rex 1.0.7
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
# This file is automatically generated. Do not modify it.
# Generated by: oedipus_lex version 2.6.1.
# Source: lib/nokogiri/css/tokenizer.rex
#++

module Nokogiri
module CSS
# :nodoc: all
class Tokenizer
require 'strscan'

class ScanError < StandardError ; end
##
# The generated lexer Nokogiri::CSS::Tokenizer

attr_reader :lineno
attr_reader :filename
attr_accessor :state
class Nokogiri::CSS::Tokenizer
require 'strscan'

def scan_setup(str)
@ss = StringScanner.new(str)
@lineno = 1
@state = nil
end
# :stopdoc:
NL = /\n|\r\n|\r|\f/
W = /[\s]*/
NONASCII = /[^\0-\177]/
NUM = /-?([0-9]+|[0-9]*\.[0-9]+)/
UNICODE = /\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?/
ESCAPE = /#{UNICODE}|\\[^\n\r\f0-9A-Fa-f]/
NMCHAR = /[_A-Za-z0-9-]|#{NONASCII}|#{ESCAPE}/
NMSTART = /[_A-Za-z]|#{NONASCII}|#{ESCAPE}/
IDENT = /-?(#{NMSTART})(#{NMCHAR})*/
NAME = /(#{NMCHAR})+/
STRING1 = /"([^\n\r\f"]|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*"/
STRING2 = /'([^\n\r\f']|#{NL}|#{NONASCII}|#{ESCAPE})*(?<!\\)(?:\\{2})*'/
STRING = /#{STRING1}|#{STRING2}/
# :startdoc:
# :stopdoc:
class LexerError < StandardError ; end
class ScanError < LexerError ; end
# :startdoc:

def action
yield
end
##
# The current line number.

def scan_str(str)
scan_setup(str)
do_parse
end
alias :scan :scan_str
attr_accessor :lineno
##
# The file name / path

def load_file( filename )
@filename = filename
File.open(filename, "r") do |f|
scan_setup(f.read)
end
end

def scan_file( filename )
load_file(filename)
do_parse
end
attr_accessor :filename

##
# The StringScanner for this lexer.

def next_token
return if @ss.eos?
attr_accessor :ss

# skips empty actions
until token = _next_token or @ss.eos?; end
token
end
##
# The current lexical state.

def _next_token
text = @ss.peek(1)
@lineno += 1 if text == "\n"
token = case @state
when nil
case
when (text = @ss.scan(/has\([\s]*/))
action { [:HAS, text] }
attr_accessor :state

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
action { [:FUNCTION, text] }
alias :match :ss

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
action { [:IDENT, text] }
##
# The match groups for the current scan.

when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
action { [:HASH, text] }
def matches
m = (1..9).map { |i| ss[i] }
m.pop until m[-1] or m.empty?
m
end

when (text = @ss.scan(/[\s]*~=[\s]*/))
action { [:INCLUDES, text] }
##
# Yields on the current action.

when (text = @ss.scan(/[\s]*\|=[\s]*/))
action { [:DASHMATCH, text] }
def action
yield
end

when (text = @ss.scan(/[\s]*\^=[\s]*/))
action { [:PREFIXMATCH, text] }
##
# The previous position. Only available if the :column option is on.

when (text = @ss.scan(/[\s]*\$=[\s]*/))
action { [:SUFFIXMATCH, text] }
attr_accessor :old_pos

when (text = @ss.scan(/[\s]*\*=[\s]*/))
action { [:SUBSTRINGMATCH, text] }
##
# The position of the start of the current line. Only available if the
# :column option is on.

when (text = @ss.scan(/[\s]*!=[\s]*/))
action { [:NOT_EQUAL, text] }
attr_accessor :start_of_current_line_pos

when (text = @ss.scan(/[\s]*=[\s]*/))
action { [:EQUAL, text] }
##
# The current column, starting at 0. Only available if the
# :column option is on.
def column
old_pos - start_of_current_line_pos
end

when (text = @ss.scan(/[\s]*\)/))
action { [:RPAREN, text] }

when (text = @ss.scan(/\[[\s]*/))
action { [:LSQUARE, text] }
##
# The current scanner class. Must be overridden in subclasses.

when (text = @ss.scan(/[\s]*\]/))
action { [:RSQUARE, text] }
def scanner_class
StringScanner
end unless instance_methods(false).map(&:to_s).include?("scanner_class")

when (text = @ss.scan(/[\s]*\+[\s]*/))
action { [:PLUS, text] }
##
# Parse the given string.

when (text = @ss.scan(/[\s]*>[\s]*/))
action { [:GREATER, text] }
def parse str
self.ss = scanner_class.new str
self.lineno = 1
self.start_of_current_line_pos = 0
self.state ||= nil

when (text = @ss.scan(/[\s]*,[\s]*/))
action { [:COMMA, text] }
do_parse
end

when (text = @ss.scan(/[\s]*~[\s]*/))
action { [:TILDE, text] }
##
# Read in and parse the file at +path+.

when (text = @ss.scan(/\:not\([\s]*/))
action { [:NOT, text] }
def parse_file path
self.filename = path
open path do |f|
parse f.read
end
end

when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
action { [:NUMBER, text] }
##
# The current location in the parse.

when (text = @ss.scan(/[\s]*\/\/[\s]*/))
action { [:DOUBLESLASH, text] }
def location
[
(filename || "<input>"),
lineno,
column,
].compact.join(":")
end

when (text = @ss.scan(/[\s]*\/[\s]*/))
action { [:SLASH, text] }
##
# Lex the next token.

when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
action {[:UNICODE_RANGE, text] }
def next_token

when (text = @ss.scan(/[\s]+/))
action { [:S, text] }
token = nil

when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
action { [:STRING, text] }
until ss.eos? or token do
if ss.check(/\n/) then
self.lineno += 1
# line starts 1 position after the newline
self.start_of_current_line_pos = ss.pos + 1
end
self.old_pos = ss.pos
token =
case state
when nil then
case
when text = ss.scan(/has\(#{W}/) then
action { [:HAS, text] }
when text = ss.scan(/#{NUM}/) then
action { [:NUMBER, text] }
when text = ss.scan(/#{IDENT}\(#{W}/) then
action { [:FUNCTION, text] }
when text = ss.scan(/#{IDENT}/) then
action { [:IDENT, text] }
when text = ss.scan(/##{NAME}/) then
action { [:HASH, text] }
when text = ss.scan(/#{W}\~=#{W}/) then
action { [:INCLUDES, text] }
when text = ss.scan(/#{W}\|=#{W}/) then
action { [:DASHMATCH, text] }
when text = ss.scan(/#{W}\^=#{W}/) then
action { [:PREFIXMATCH, text] }
when text = ss.scan(/#{W}\$=#{W}/) then
action { [:SUFFIXMATCH, text] }
when text = ss.scan(/#{W}\*=#{W}/) then
action { [:SUBSTRINGMATCH, text] }
when text = ss.scan(/#{W}!=#{W}/) then
action { [:NOT_EQUAL, text] }
when text = ss.scan(/#{W}=#{W}/) then
action { [:EQUAL, text] }
when text = ss.scan(/#{W}\)/) then
action { [:RPAREN, text] }
when text = ss.scan(/\[#{W}/) then
action { [:LSQUARE, text] }
when text = ss.scan(/#{W}\]/) then
action { [:RSQUARE, text] }
when text = ss.scan(/#{W}\+#{W}/) then
action { [:PLUS, text] }
when text = ss.scan(/#{W}>#{W}/) then
action { [:GREATER, text] }
when text = ss.scan(/#{W},#{W}/) then
action { [:COMMA, text] }
when text = ss.scan(/#{W}~#{W}/) then
action { [:TILDE, text] }
when text = ss.scan(/:not\(#{W}/) then
action { [:NOT, text] }
when text = ss.scan(/#{W}\/\/#{W}/) then
action { [:DOUBLESLASH, text] }
when text = ss.scan(/#{W}\/#{W}/) then
action { [:SLASH, text] }
when text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/) then
action {[:UNICODE_RANGE, text] }
when text = ss.scan(/[\s]+/) then
action { [:S, text] }
when text = ss.scan(/#{STRING}/) then
action { [:STRING, text] }
when text = ss.scan(/./) then
action { [text, text] }
else
text = ss.string[ss.pos .. -1]
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
end
else
raise ScanError, "undefined state at #{location}: '#{state}'"
end # token = case state

when (text = @ss.scan(/./))
action { [text, text] }
next unless token # allow functions to trigger redo w/ nil
end # while


else
text = @ss.string[@ss.pos .. -1]
raise ScanError, "can not match: '" + text + "'"
end # if
raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
token.nil? || (Array === token && token.size >= 2)

else
raise ScanError, "undefined state: '" + state.to_s + "'"
end # case state
token
end # def _next_token
# auto-switch state
self.state = token.last if token && token.first == :state

token
end # def next_token
def do_parse
end
end # class
end
end
Loading