Permalink
Browse files

Initial pass at scaper to pull in confreaks video

  • Loading branch information...
1 parent 49b11b1 commit f4dc08cd3b8e2d1794d95b654f5fdb5f6d3801d4 @mattpolito committed Dec 16, 2011
View
@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*
View
@@ -1,5 +1,3 @@
source :rubygems
-gem 'nokogiri'
-gem 'rest-client'
-gem 'pry'
+gemspec
View
@@ -1,3 +1,10 @@
+PATH
+ remote: .
+ specs:
+ confed_scraper (0.0.1)
+ nokogiri
+ rest-client
+
GEM
remote: http://rubygems.org/
specs:
@@ -22,6 +29,5 @@ PLATFORMS
ruby
DEPENDENCIES
- nokogiri
+ confed_scraper!
pry
- rest-client
View
@@ -0,0 +1 @@
+require "bundler/gem_tasks"
View
@@ -0,0 +1,25 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "confed_scraper/version"
+
+Gem::Specification.new do |s|
+ s.name = "confed_scraper"
+ s.version = ConfedScraper::VERSION
+ s.authors = ["Matt Polito"]
+ s.email = ["matt.polito@gmail.com"]
+ s.homepage = "http://github.com/mattpolito/confed_scraper"
+ s.summary = %q{Site scrapers to ease data import into Confed}
+ s.description = %q{Site scrapers to ease data import into Confed}
+
+ s.rubyforge_project = "confed_scraper"
+
+ s.files = `git ls-files`.split("\n")
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+ s.require_paths = ["lib"]
+
+ # specify any dependencies here; for example:
+ s.add_development_dependency "pry"
+ s.add_runtime_dependency "rest-client"
+ s.add_runtime_dependency "nokogiri"
+end
View
@@ -0,0 +1,6 @@
+require "confed_scraper/version"
+require 'confed_scraper/scraper'
+require 'confed_scraper/confreaks_scraper'
+
+module ConfedScraper
+end
@@ -1,22 +1,4 @@
-require 'bundler'
-Bundler.require
-
-class Scraper
- attr_reader :url
-
- def initialize(url)
- @url = url
- end
-
- def process(url)
- end
-
- def get_content_from(url)
- RestClient.get(url)
- end
-end
-
-class ConfreaksScraper < Scraper
+class ConfedScraper::ConfreaksScraper < ConfedScraper::Scraper
SITE_URI = "http://confreaks.net"
def process
@@ -30,13 +12,13 @@ def process
show_page_content = RestClient.get(show_page_url)
show_page_doc = Nokogiri::HTML.parse(show_page_content)
- p "Sraping: #{show_page_url}"
+ scrape_message(show_page_url)
- title = show_page_doc.xpath('.//*[@class="video-title"]').text.strip
+ title = show_page_doc.xpath('//*[@class="video-title"]').text.strip
vid[:title] = title
vid[:uri] = show_page_url
- vid[:presenters] = show_page_doc.xpath('.//*[@class="video-presenters"]/a').map(&:text)
- vid[:description] = show_page_doc.xpath('.//*[@class="video-abstract"]/p').text.strip
+ vid[:presenters] = show_page_doc.xpath('//*[@class="video-presenters"]/a').map(&:text)
+ vid[:description] = show_page_doc.xpath('//*[@class="video-abstract"]/p').text.strip
video_data << vid
end
@@ -0,0 +1,18 @@
+class ConfedScraper
+ attr_reader :url
+
+ def initialize(url)
+ @url = url
+ end
+
+ def process(url)
+ end
+
+ def get_content_from(url)
+ RestClient.get(url)
+ end
+
+ def scrape_message(url)
+ p "Scraping: #{url}"
+ end
+end
@@ -0,0 +1,3 @@
+module ConfedScraper
+ VERSION = "0.0.1"
+end

0 comments on commit f4dc08c

Please sign in to comment.