Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ensure that scraper subclasses can override parent rules

  • Loading branch information...
commit 5925eb22c971cfd769442fd099da53d1e760142e 1 parent 27f61e9
@mislav authored
Showing with 28 additions and 18 deletions.
  1. +28 −18 scraper.rb
View
46 scraper.rb
@@ -32,9 +32,9 @@ def initialize(doc)
end
# initialize plural accessor values
- self.class.plurals.each { |name|
- send("#{name}=", [])
- }
+ self.class.rules.each do |name, (s, k, plural)|
+ send("#{name}=", []) if plural
+ end
end
# Initialize a new scraper and process data
@@ -45,7 +45,7 @@ def self.parse(html)
# Specify a new singular scraping rule
def self.element(selector)
selector, name, klass = parse_rule_declaration(selector)
- rules << [selector, name, klass]
+ rules[name] = [selector, klass]
attr_accessor name
name
end
@@ -53,13 +53,13 @@ def self.element(selector)
# Specify a new plural scraping rule
def self.elements(selector)
name = element(selector)
- plurals << name
+ rules[name] << true
end
# Let it do its thing!
def parse
- self.class.rules.each do |selector, target, klass|
- if plural? target
+ self.class.rules.each do |target, (selector, klass, plural)|
+ if plural
@doc.search(selector).each do |node|
send(target) << parse_result(node, klass)
end
@@ -100,20 +100,11 @@ def self.parse_rule_declaration(selector)
end
def self.rules
- @rules ||= []
- end
-
- def self.plurals
- @plurals ||= []
- end
-
- def plural?(name)
- self.class.plurals.include?(name)
+ @rules ||= {}
end
def self.inherited(subclass)
- subclass.rules.concat self.rules
- subclass.plurals.concat self.plurals
+ subclass.rules.update self.rules
end
end
@@ -148,6 +139,11 @@ class BlogScraper < Scraper
elements '#nav li' => :navigation_items
end
+ class OverrideBlogScraper < BlogScraper
+ elements :title
+ element '#nav li' => :navigation_items
+ end
+
class BlogWithArticles < BlogScraper
elements 'div.hentry' => :articles, :with => Article
end
@@ -240,6 +236,20 @@ def search(selector)
titles.should == ['fake test1', 'fake test2', 'fake test3']
end
end
+
+ describe OverrideBlogScraper do
+ before(:all) do
+ @blog = described_class.parse(HTML)
+ end
+
+ it "should have plural titles" do
+ @blog.title.should == ['Maximum awesome']
+ end
+
+ it "should have singular navigation item" do
+ @blog.navigation_items.should == 'Home'
+ end
+ end
end
__END__
Please sign in to comment.
Something went wrong with that request. Please try again.