Skip to content

Commit

Permalink
Isolated offset code behind the capture_offsets option and enabled im…
Browse files Browse the repository at this point in the history
…port tracing
  • Loading branch information
G0dwin committed Mar 13, 2017
1 parent a4ac09f commit b69f7d6
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 115 deletions.
8 changes: 8 additions & 0 deletions README.md
Expand Up @@ -55,6 +55,14 @@ parser.to_s
body { margin: 0 1em; }
```

# capturing byte offsets within a file
parser.load_uri!('../style.css', {:base_uri => 'http://example.com/styles/inc/', :capture_offsets => true)
content_rule = parser.find_rule_sets(['#content']).first
content_rule.filename
#=> 'http://example.com/styles/styles.css'
content_rule.offset
#=> (10703..10752)

# Testing

```Bash
Expand Down
26 changes: 26 additions & 0 deletions Rakefile
Expand Up @@ -7,3 +7,29 @@ desc 'Run the unit tests.'
Rake::TestTask.new(:default) do |test|
test.verbose = true
end

desc 'Run a performance evaluation.'
task :benchmark do
require 'benchmark'
require 'css_parser'

base_dir = File.dirname(__FILE__) + '/test/fixtures'

# parse the import1 file to benchmark file loading
time = Benchmark.measure do
10000.times do
parser = CssParser::Parser.new
parser.load_file!('import1.css', base_dir)
end
end
puts "Parsing 'import1.css' 10 000 times took #{time.real.round(4)} seconds"

# parse the import1 file to benchmark rule parsing
time = Benchmark.measure do
1000.times do
parser = CssParser::Parser.new
parser.load_file!('complex.css', base_dir)
end
end
puts "Parsing 'complex.css' 1 000 times took #{time.real.round(4)} seconds"
end
126 changes: 95 additions & 31 deletions lib/css_parser/parser.rb
Expand Up @@ -36,7 +36,8 @@ class << self; attr_reader :folded_declaration_cache; end
def initialize(options = {})
@options = {:absolute_paths => false,
:import => true,
:io_exceptions => true}.merge(options)
:io_exceptions => true,
:capture_offsets => false}.merge(options)

# array of RuleSets
@rules = []
Expand Down Expand Up @@ -117,7 +118,7 @@ def add_block!(block, options = {})
options[:media_types] = [options[:media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt)}
options[:only_media_types] = [options[:only_media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt)}

block = cleanup_block(block)
block = cleanup_block(block, options)

if options[:base_uri] and @options[:absolute_paths]
block = CssParser.convert_uris(block, options[:base_uri])
Expand All @@ -139,26 +140,41 @@ def add_block!(block, options = {})

import_path = import_rule[0].to_s.gsub(/['"]*/, '').strip

import_options = { :media_types => media_types }
import_options[:capture_offsets] = true if options[:capture_offsets]

if options[:base_uri]
import_uri = Addressable::URI.parse(options[:base_uri].to_s) + Addressable::URI.parse(import_path)
load_uri!(import_uri, options[:base_uri], media_types)
import_options[:base_uri] = options[:base_uri]
load_uri!(import_uri, import_options)
elsif options[:base_dir]
load_file!(import_path, options[:base_dir], media_types)
import_options[:base_dir] = options[:base_dir]
load_file!(import_path, import_options)
end
end
end

# Remove @import declarations
block.gsub!(RE_AT_IMPORT_RULE) { |m| ' ' * m.length }
block = remove_all(block, RE_AT_IMPORT_RULE, options)

parse_block_into_rule_sets!(block, options)
end

# Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
#
# +media_types+ can be a symbol or an array of symbols.
def add_rule!(selectors, declarations, media_types = :all, offset = nil)
rule_set = RuleSet.new(selectors, declarations, nil, offset)
def add_rule!(selectors, declarations, media_types = :all)
rule_set = RuleSet.new(selectors, declarations)
add_rule_set!(rule_set, media_types)
end

# Add a CSS rule by setting the +selectors+, +declarations+, +uri+, +offset+ and +media_types+.
#
# +uri+ can be a string or uri pointing to the file or url location.
# +offset+ should be Range object representing the start and end byte locations where the rule was found in the file.
# +media_types+ can be a symbol or an array of symbols.
def add_file_rule!(selectors, declarations, uri, offset, media_types = :all)
rule_set = FileRuleSet.new(uri, offset, selectors, declarations)
add_rule_set!(rule_set, media_types)
end

Expand Down Expand Up @@ -289,16 +305,15 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
current_media_query = ''
current_declarations = ''

# once we are in a rule, we will use this to store where we started
# once we are in a rule, we will use this to store where we started if we are capturing offsets
rule_start = nil
offset = nil

block.scan(/(([\\]{2,})|([\\]?[{}\s"])|(.[^\s"{}\\]*))/) do |matches|
# encode here because it can affect the length of the string
token = matches[0].encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
token = matches[0]

# save the regex offset so tat we know where in the file we are
offset = Regexp.last_match.offset(0)
# save the regex offset so that we know where in the file we are
offset = Regexp.last_match.offset(0) if options[:capture_offsets]

if token =~ /\A"/ # found un-escaped double quote
in_string = !in_string
Expand All @@ -324,13 +339,18 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
in_declarations -= 1

unless current_declarations.strip.empty?
add_rule!(current_selectors, current_declarations, current_media_queries, (rule_start..offset.last))
if options[:capture_offsets]
add_file_rule!(current_selectors, current_declarations, options[:filename], (rule_start..offset.last), current_media_queries)
else
add_rule!(current_selectors, current_declarations, current_media_queries)
end
end

# restart our search for selectors and declarations
rule_start = nil
current_selectors = ''
current_declarations = ''

# restart our search for selectors and declarations
rule_start = nil if options[:capture_offsets]
end
elsif token =~ /@media/i
# found '@media', reset current media_types
Expand Down Expand Up @@ -366,23 +386,26 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
end
else
if token =~ /\{/ and not in_string
current_selectors.gsub!(/^[\s]*/, '')
current_selectors.gsub!(/[\s]*$/, '')
current_selectors.strip!
in_declarations += 1
else
# if we are in a selector, add the token to te current selectors
# if we are in a selector, add the token to the current selectors
current_selectors += token

# mark this as the beginning of the selector unless we have already marked it
rule_start = offset.first if rule_start.nil? && token =~ /^[^\s]+$/
rule_start = offset.first if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
end
end
end
end

# check for unclosed braces
if in_declarations > 0
add_rule!(current_selectors, current_declarations, current_media_queries, (rule_start..offset.last))
if options[:capture_offsets]
add_file_rule!(current_selectors, current_declarations, options[:filename], (rule_start..offset.last), current_media_queries)
else
add_rule!(current_selectors, current_declarations, current_media_queries)
end
end
end

Expand All @@ -395,7 +418,6 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
# Deprecated: originally accepted three params: `uri`, `base_uri` and `media_types`
def load_uri!(uri, options = {}, deprecated = nil)
uri = Addressable::URI.parse(uri) unless uri.respond_to? :scheme
#base_uri = nil, media_types = :all, options = {}

opts = {:base_uri => nil, :media_types => :all}

Expand All @@ -413,22 +435,46 @@ def load_uri!(uri, options = {}, deprecated = nil)

opts[:base_uri] = uri if opts[:base_uri].nil?

# pass on the uri if we are capturing file offsets
opts[:filename] = uri.to_s if opts[:capture_offsets]

src, = read_remote_file(uri) # skip charset
if src
add_block!(src, opts)
end
end

# Load a local CSS file.
def load_file!(file_name, base_dir = nil, media_types = :all)
file_name = File.expand_path(file_name, base_dir)
def load_file!(file_name, options = {}, deprecated = nil)
opts = {:base_dir => nil, :media_types => :all}

if options.is_a? Hash
opts.merge!(options)
else
opts[:base_dir] = options if options.is_a? String
opts[:media_types] = deprecated if deprecated
end

file_name = File.expand_path(file_name, opts[:base_dir])
return unless File.readable?(file_name)
return unless circular_reference_check(file_name)

src = IO.read(file_name)
base_dir = File.dirname(file_name)
# using open takes a little longer than IO.read but retains line-breaks consistently
# across platforms which is important when capturing offsets
if opts[:capture_offsets]
fh = open(file_name, 'rb')
src = fh.read
fh.close

add_block!(src, {:media_types => media_types, :base_dir => base_dir})
# pass on the file name if we are capturing file offsets
opts[:filename] = file_name
else
src = IO.read(file_name)
end

opts[:base_dir] = File.dirname(file_name)

add_block!(src, opts)
end

# Load a local CSS string.
Expand All @@ -454,16 +500,33 @@ def circular_reference_check(path)
end
end

# Remove a pattern from a given string
#
# Returns a string.
def remove_all(css, regex, options)
# if we are capturing file offsets, replace the characters with spaces to retail the original positions
return css.gsub(regex) { |m| ' ' * m.length } if options[:capture_offsets]

# otherwise just strip it out
css.gsub(regex, '')
end

# Strip comments and clean up blank lines from a block of CSS.
#
# Returns a string.
def cleanup_block(block) # :nodoc:
# Strip CSS comments but make sure the string stays the same length so that we can retain byte offsets
block.gsub!(STRIP_CSS_COMMENTS_RX) { |m| ' ' * m.length }
def cleanup_block(block, options = {}) # :nodoc:
# Strip CSS comments
utf8_block = block.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: ' '))
utf8_block = remove_all(utf8_block, STRIP_CSS_COMMENTS_RX, options)

# Strip HTML comments - they shouldn't really be in here but
# some people are just crazy...
block.gsub(STRIP_HTML_COMMENTS_RX) { |m| ' ' * m.length }
utf8_block = remove_all(utf8_block, STRIP_HTML_COMMENTS_RX, options)

# Strip lines containing just whitespace
utf8_block.gsub!(/^\s+$/, "") unless options[:capture_offsets]

utf8_block
end

# Download a file into a string.
Expand Down Expand Up @@ -491,8 +554,9 @@ def read_remote_file(uri) # :nodoc:

src = '', charset = nil

uri = Addressable::URI.parse(uri.to_s)
begin
uri = Addressable::URI.parse(uri.to_s)

if uri.scheme == 'file'
# local file
path = uri.path
Expand Down
21 changes: 16 additions & 5 deletions lib/css_parser/rule_set.rb
Expand Up @@ -10,18 +10,14 @@ class RuleSet
# Array of selector strings.
attr_reader :selectors

# File offset range
attr_reader :offset

# Integer with the specificity to use for this RuleSet.
attr_accessor :specificity

def initialize(selectors, block, specificity = nil, offset = nil)
def initialize(selectors, block, specificity = nil)
@selectors = []
@specificity = specificity
@declarations = {}
@order = 0
@offset = offset
parse_selectors!(selectors) if selectors
parse_declarations!(block)
end
Expand Down Expand Up @@ -517,4 +513,19 @@ def parse_selectors!(selectors) # :nodoc:
@selectors = selectors.split(',').map { |s| s.gsub(/\s+/, ' ').strip }
end
end

class FileRuleSet < RuleSet

# File offset range
attr_reader :offset

# the local or remote location
attr_accessor :filename

def initialize(filename, offset, selectors, block, specificity = nil)
super(selectors, block, specificity)
@offset = offset
@filename = filename
end
end
end
10 changes: 0 additions & 10 deletions test/test_css_parser_basic.rb
Expand Up @@ -67,14 +67,4 @@ def test_converting_to_hash
hash = @cp.to_h
assert_equal 'blue', hash['all']['div']['color']
end

def test_accessing_file_offsets
@cp.add_block!(@css)
i = 0
offsets = [(6..36), (43..62), (69..111), (118..142)]
@cp.each_rule_set do |rule_set, media_types|
assert_equal offsets[i], rule_set.offset
i += 1
end
end
end
15 changes: 0 additions & 15 deletions test/test_css_parser_loading.rb
Expand Up @@ -43,15 +43,6 @@ def teardown
def test_loading_301_redirect
@cp.load_uri!("#{@uri_base}/redirect301")
assert_equal 'margin: 0px;', @cp.find_by_selector('p').join(' ')

# check rule offsets
i = 0
# accommodate for different encodings between windows and unix
offsets = Gem.win_platform? ? [(0..46), (50..68)] : [(0..43), (45..63)]
@cp.each_rule_set do |rule_set, media_types|
assert_equal offsets[i], rule_set.offset
i += 1
end
end

def test_loading_302_redirect
Expand Down Expand Up @@ -84,12 +75,6 @@ def test_loading_a_remote_file_over_ssl
else
@cp.load_uri!("https://dialect.ca/inc/screen.css")
assert_match( /margin\: 0\;/, @cp.find_by_selector('body').join(' ') )

# there are a lot of rules in this file, but check some rule offsets
rules = @cp.find_rule_sets(['#container', '#name_case_converter textarea'])
assert_equal 2, rules.count
assert_equal (2172..2227), rules.first.offset
assert_equal (10703..10752), rules.last.offset
end
end

Expand Down

0 comments on commit b69f7d6

Please sign in to comment.