Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

adding tokenizer and parser

  • Loading branch information...
commit 9246422c850b9e580eead224fd17ed5ff5564878 1 parent 09f133d
@tenderlove tenderlove authored
View
2  .gitignore
@@ -7,4 +7,6 @@ ext/nokogiri/*.bundle
ext/nokogiri/*.a
ext/nokogiri/*.so
ext/nokogiri/conftest.dSYM
+lib/nokogiri/css/generated_parser.rb
+lib/nokogiri/css/generated_tokenizer.rb
tags
View
20 Rakefile
@@ -3,18 +3,24 @@
require 'rubygems'
require 'hoe'
+kind = Config::CONFIG['DLEXT']
+
LIB_DIR = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
$LOAD_PATH << LIB_DIR
-require 'nokogiri/version'
+GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
+GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
-kind = Config::CONFIG['DLEXT']
+EXT = "ext/nokogiri/native.#{kind}"
+
+require 'nokogiri/version'
HOE = Hoe.new('nokogiri', Nokogiri::VERSION) do |p|
p.developer('Aaron Patterson', 'aaronp@rubyforge.org')
p.clean_globs = [
'ext/nokogiri/Makefile',
'ext/nokogiri/*.{o,so,bundle,a,log}',
+ GENERATED_PARSER,
]
end
@@ -33,7 +39,13 @@ task :coverage do
sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
end
-EXT = "ext/nokogiri/native.#{kind}"
+file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
+ sh "racc -o #{t.name} #{t.prerequisites.first}"
+end
+
+file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
+ sh "frex --independent -o #{t.name} #{t.prerequisites.first}"
+end
task 'ext/nokogiri/Makefile' do
Dir.chdir('ext/nokogiri') do
@@ -47,7 +59,7 @@ task EXT => 'ext/nokogiri/Makefile' do
end
end
-task :build => EXT
+task :build => [EXT, GENERATED_PARSER, GENERATED_TOKENIZER]
Rake::Task[:test].prerequisites << :build
View
113 lib/nokogiri/css/parser.y
@@ -0,0 +1,113 @@
+class Nokogiri::CSS::GeneratedParser
+
+token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
+token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
+token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
+
+rule
+ selector
+ : simple_selector_1toN { result = val.flatten }
+ ;
+ combinator
+ : PLUS s_0toN { result = :SAC_DIRECT_ADJACENT_SELECTOR }
+ | GREATER s_0toN { result = :SAC_CHILD_SELECTOR }
+ | S { result = :SAC_DESCENDANT_SELECTOR }
+ ;
+ simple_selector
+ : element_name hcap_0toN {
+ result = if val[1].nil?
+ val.first
+ else
+ ConditionalSelector.new(val.first, val[1])
+ end
+ }
+ | hcap_1toN {
+ result = ConditionalSelector.new(nil, val.first)
+ }
+ ;
+ simple_selector_1toN
+ : simple_selector combinator simple_selector_1toN {
+ result =
+ case val[1]
+ when :SAC_DIRECT_ADJACENT_SELECTOR
+ SiblingSelector.new(val.first, val[2])
+ when :SAC_DESCENDANT_SELECTOR
+ DescendantSelector.new(val.first, val[2])
+ when :SAC_CHILD_SELECTOR
+ ChildSelector.new(val.first, val[2])
+ end
+ }
+ | simple_selector
+ ;
+ class
+ : '.' IDENT { result = ClassCondition.new(val[1]) }
+ ;
+ element_name
+ : IDENT { result = ElementSelector.new(val.first) }
+ | '*' { result = SimpleSelector.new() }
+ ;
+ attrib
+ : '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
+ result = AttributeCondition.build(val[2], val[4])
+ }
+ ;
+ function
+ : FUNCTION s_0toN ')' s_0toN {
+ ### We only support 0 argument functions for now....
+ result = Function.new(val[0], val[2].flatten.select { |x| x !~ /,/ })
+ }
+ ;
+ pseudo
+ : ':' function {
+ result = PseudoClassCondition.new(val[1])
+ }
+ | ':' IDENT { result = PseudoClassCondition.new(val[1]) }
+ ;
+ hcap_0toN
+ : hcap_1toN
+ |
+ ;
+ hcap_1toN
+ : attribute_id hcap_1toN {
+ result = CombinatorCondition.new(val[0], val[1])
+ }
+ | class hcap_1toN {
+ result = CombinatorCondition.new(val[0], val[1])
+ }
+ | attrib hcap_1toN {
+ result = CombinatorCondition.new(val[0], val[1])
+ }
+ | pseudo hcap_1toN {
+ result = CombinatorCondition.new(val[0], val[1])
+ }
+ | attribute_id
+ | class
+ | attrib
+ | pseudo
+ ;
+ attribute_id
+ : HASH { result = IDCondition.new(val.first) }
+ ;
+ attrib_val_0or1
+ : eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
+ | eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
+ |
+ ;
+ eql_incl_dash
+ : '='
+ | INCLUDES
+ | DASHMATCH
+ ;
+ s_0toN
+ : S s_0toN
+ |
+ ;
+end
+
+---- header
+ require "css/sac/conditions"
+ require "css/sac/selectors"
+
+---- inner
+ include CSS::SAC::Conditions
+ include CSS::SAC::Selectors
View
62 lib/nokogiri/css/tokenizer.rex
@@ -0,0 +1,62 @@
+module Nokogiri
+module CSS
+class Tokenizer
+
+macro
+ nl \n|\r\n|\r|\f
+ w [\s\r\n\f]*
+ nonascii [^\\\\0-\\\\177]
+ num [0-9]+|[0-9]*\.[0-9]+
+ unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
+
+ escape {unicode}|\\[^\n\r\f0-9a-f]
+ nmchar [_a-z0-9-]|{nonascii}|{escape}
+ nmstart [_a-z]|{nonascii}|{escape}
+ ident [-]?{nmstart}{nmchar}*
+ name {nmchar}+
+ string1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
+ string2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
+ string {string1}|{string2}
+ invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
+ invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
+ invalid {invalid1}|{invalid2}
+ Comment \/\*(.|[\r\n])*?\*\/
+
+rule
+
+# [:state] pattern [actions]
+
+ [ \t\r\n\f]+ { [:S, text] }
+
+ ~= { [:INCLUDES, text] }
+ \|= { [:DASHMATCH, text] }
+ ^= { [:PREFIXMATCH, text] }
+ \$= { [:SUFFIXMATCH, text] }
+ \*= { [:SUBSTRINGMATCH, text] }
+ {ident} { [:IDENT, text] }
+ {string} { [:STRING, text] }
+ {ident}\( { [:FUNCTION, text] }
+ {num} { [:NUMBER, text] }
+ #{name} { [:HASH, text] }
+ {w}\+ { [:PLUS, text] }
+ {w}> { [:GREATER, text] }
+ {w}, { [:COMMA, text] }
+ {w}~ { [:TILDE, text] }
+ \:not\( { [:NOT, text] }
+ @{ident} { [:ATKEYWORD, text] }
+ {invalid} { [:INVALID, text] }
+ {num}% { [:PERCENTAGE, text] }
+ {num}{ident} { [:DIMENSION, text] }
+ <!-- { [:CDO, text] }
+ --> { [:CDC, text] }
+
+ url\({w}{string}{w}\) { [:URI, text] }
+ url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\) { [:URI, text] }
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
+
+ {Comment} /* ignore comments */
+
+ . { [text, text] }
+end
+end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.