Browse files

block passed to rule declaration defines rules for delegate class

Example:
  class BlogWithArticlesBlock < BlogScraper
    elements 'div.hentry' => :articles do
      element 'h1' => :title
    end
  end
  • Loading branch information...
1 parent 065aa34 commit 579b3a51677aa4ee6c4f85b5f3c90b7af43fd6c9 @mislav committed Oct 24, 2009
Showing with 27 additions and 7 deletions.
  1. +27 −7 scraper.rb
View
34 scraper.rb
@@ -43,16 +43,16 @@ def self.parse(html)
end
# Specify a new singular scraping rule
- def self.element(selector)
- selector, name, delegate = parse_rule_declaration(selector)
+ def self.element(selector, &block)
+ selector, name, delegate = parse_rule_declaration(selector, &block)
rules[name] = [selector, delegate]
attr_accessor name
name
end
# Specify a new plural scraping rule
- def self.elements(selector)
- name = element(selector)
+ def self.elements(selector, &block)
+ name = element(selector, &block)
rules[name] << true
end
@@ -90,13 +90,16 @@ def parse_result(node, delegate)
#
# :title
# #=> ['title', :title, nil]
- def self.parse_rule_declaration(selector)
+ def self.parse_rule_declaration(selector, &block)
if Hash === selector
delegate = selector.delete(:with)
- selector.to_a.flatten << delegate
+ selector, name = selector.to_a.flatten
else
- [selector.to_s, selector.to_sym, nil]
+ selector, name, delegate = selector.to_s, selector.to_sym, nil
end
+ # eval block in context of a new scraper subclass
+ delegate = Class.new(delegate || Scraper, &block) if block_given?
+ return selector, name, delegate
end
def self.rules
@@ -152,6 +155,12 @@ class BlogWithTimestampedArticles < BlogScraper
elements 'div.hentry' => :articles, :with => TimestampedArticle
end
+ class BlogWithArticlesBlock < BlogScraper
+ elements 'div.hentry' => :articles do
+ element 'h1' => :title
+ end
+ end
+
class FakeHtmlParser
def initialize(name)
@name = name
@@ -250,6 +259,17 @@ def search(selector)
@blog.navigation_items.should == 'Home'
end
end
+
+ describe BlogWithArticlesBlock do
+ before(:all) do
+ @blog = described_class.parse(HTML)
+ end
+
+ it "should have article objects" do
+ titles = @blog.articles.map { |article| article.title }
+ titles.should == ['First article', 'Second article']
+ end
+ end
end
__END__

0 comments on commit 579b3a5

Please sign in to comment.