Permalink
Browse files

fix outdated example scripts

  • Loading branch information...
1 parent e7d4878 commit 1cbaa6cdf319ba71082655acef6bc80a67767aba @mislav committed Dec 28, 2011
Showing with 36 additions and 29 deletions.
  1. +16 −18 examples/delicious.rb
  2. +18 −9 examples/tweetburner.rb
  3. +2 −2 examples/twitter.rb
View
@@ -5,33 +5,31 @@
require 'nibbler'
require 'open-uri'
-require 'date'
# extracts data from a single bookmark
class Bookmark < Nibbler
- element 'h4 a' => :title
- element '.description' => :description
-
- # extract attribute with xpath
- element './/h4/a/@href' => :url
-
+ element '.body .title' => :title
+ element '.note' => :description
+
+ element '.sub span' => :url
+
# tags are plural
- elements 'ul.tag-chain .tagItem' => :tags
-
- # dates are in form "22 OCT 09"
- element '.dateGroup span' => :date, :with => lambda { |span|
- Date.strptime(span.inner_text.strip, '%d %b %y')
- }
+ elements '.tag .name' => :tags
+
+ # extract timestamp from HTML attribute
+ element './@date' => :date, :with => lambda { |timestamp| Time.at timestamp.text.to_i }
end
# finds all bookmarks on the page
class Delicious < Nibbler
- elements '#bookmarklist div.bookmark' => :bookmarks, :with => Bookmark
+ elements '.content .linkList .link' => :bookmarks, :with => Bookmark
end
mislav = Delicious.parse open('http://delicious.com/mislav/ruby')
-bookmark = mislav.bookmarks.first
-puts bookmark.title #=> "Some title"
-p bookmark.tags #=> ['foo', 'bar', ...]
-puts bookmark.date #=> <Date>
+mislav.bookmarks[0,3].each do |bookmark|
+ puts bookmark.title #=> "Some title"
+ p bookmark.tags #=> ['foo', 'bar', ...]
+ puts bookmark.date #=> <Date>
+ puts
+end
@@ -1,3 +1,5 @@
+# encoding: utf-8
+#
## Tweetburner.com archive dump
#
# I needed to dump my Tweetburner archive to CSV
@@ -14,8 +16,15 @@ module Tweetburner
SITE = URI('http://tweetburner.com')
class Scraper < ::Nibbler
- # add our behavior to convert_document; open web pages with UTF-8 encoding
- def self.convert_document(url)
+ def initialize url
+ doc = get_document url
+ super doc
+ end
+
+ private
+
+ # open web pages with UTF-8 encoding
+ def get_document(url)
URI === url ? Nokogiri::HTML::Document.parse(open(url), url.to_s, 'UTF-8') : url
rescue OpenURI::HTTPError
$stderr.puts "ERROR opening #{url}"
@@ -31,7 +40,7 @@ class Link < ::Nibbler
element '.col-tweet-text' => :text, :with => lambda { |node|
node.text.sub(/\s+– .+?$/, '')
}
- element '.col-clicks' => :clicks
+ element '.col-clicks' => :clicks, :with => lambda { |node| node.inner_text.to_i }
element '.col-created-at' => :created_at, :with => lambda { |node| DateTime.parse node.text }
def stats
@@ -58,18 +67,18 @@ def self.parse(username)
def parse
super
if next_page_url
- @doc = self.class.convert_document(URI(next_page_url))
+ @doc = get_document(URI(next_page_url))
self.parse
- else
- self
end
+ self
end
def to_csv(io = STDOUT)
io.sync = true if io == STDOUT
- csv = CSV::Writer.create io
- links.each do |link|
- csv << [link.text, link.clicks, link.created_at, link.stats.destination]
+ CSV(io) do |csv|
+ links.each do |link|
+ csv << [link.text, link.clicks, link.created_at, link.stats.destination]
+ end
end
end
end
View
@@ -11,11 +11,11 @@
# now here's the real deal
class Twitter < NibblerJSON
- elements :tweets, :with => NibblerJSON do
+ elements :tweets do
element :created_at, :with => lambda { |time| Time.parse(time) }
element :text
element :id
- element 'user' => :author, :with => NibblerJSON do
+ element 'user' => :author do
element 'name' => :full_name
element 'screen_name' => :username
end

0 comments on commit 1cbaa6c

Please sign in to comment.