Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

added processors and wrote an example

  • Loading branch information...
commit ec2593a75a5b339f1583693621c8b76ac4ac2216 1 parent fc0380b
@mattetti authored
View
51 processors/episode_summary.rb
@@ -0,0 +1,51 @@
+# Creates a formatted summary of a collection of episodes.
+#
+class EpisodeSummary
+
+ attr_accessor :items
+
+ # Converts the passed episode items in a summary
+ # that is formatted based on the passed format.
+ # @param [Array<#title>] items The episodes to summarize
+ # @param [Symbol] format The summary format (:html and :json
+ # supported)
+ # @return [String]
+ def process(items, format=:html)
+ self.items = items
+ if format == :html
+ html_header + "\n" + \
+ items.map{|i| html_episode_summary(i)}.join("\n") + \
+ html_footer
+ elsif format == :json
+ items.map(&:to_json)
+ else
+ "Format #{format} not supported"
+ end
+ end
+
+ def html_header
+ <<-EOS
+ <!DOCTYPE html>
+ <html>
+ <head><meta charset="utf-8"></head>
+ <body>
+ <div>
+ <h1>List of episodes</h1>
+ <ul>
+ EOS
+ end
+
+ def html_footer
+ "</ul></div></body></html>"
+ end
+
+ def html_episode_summary(item)
+ <<-EOS
+ <li>
+ <h2>#{item.show_name} - #{item.title}</h2>
+ <a href="#{item.url}">Link (#{item.notes})</a>
+ </li>
+ EOS
+ end
+
+end
View
19 processors/to_file.rb
@@ -0,0 +1,19 @@
+require 'tempfile'
+
+class ToFile
+
+ # Saves the passed content to a file.
+ # @param [#to_s] content The content to save to file.
+ # @param [String, NilClass] destination The path to save the content,
+ # if none is passed, a tmpfile is used.
+ # @return [String] The path of the file the content was saved to.
+ def process(content, destination=nil)
+ if destination
+ file = File.open(destination, 'w'){|f| f << content}
+ else
+ file = Tempfile.new('scrapbook'){|f| f << content}
+ end
+ file.path
+ end
+
+end
View
9 runner.rb
@@ -1,9 +1,16 @@
require 'bundler'
Bundler.require
+require 'fileutils'
+
+STDOUT.sync = true
# Require all the scrapers
Dir.glob("./scrapers/*.rb"){|file| require file }
+Dir.glob("./processors/*.rb"){|file| require file }
+FileUtils.mkdir_p('output')
# TODO: use a scheduler and send to processors
episodes = FranceTVJeunesse.run
-puts episodes.map(&:to_json)
+summary = EpisodeSummary.new.process(episodes)
+destination = File.join(File.expand_path(File.dirname(__FILE__)), "output", "summary_#{Time.now.strftime("%Y-%m-%d")}.html")
+puts ToFile.new.process(summary, destination)
View
4 scrapers/pluzz_francetv_fr.rb
@@ -8,8 +8,8 @@ def self.run
url = "http://pluzz.francetv.fr/ajax/launchsearch/rubrique/jeunesse/datedebut/#{Time.now.strftime("%Y-%m-%dT00:00")}/datefin/#{Time.now.strftime("%Y-%m-%dT23:59")}/type/lesplusrecents/nb/100/"
page = agent.get(url)
episodes = fetch_episodes(page)
- puts "success" unless episodes.find{|e| e.failed?}
- episides
+ STDERR << "Error scraping #{url}" if episodes.find{|e| e.failed?}
+ episodes
end
def self.fetch_episodes(page)
Please sign in to comment.
Something went wrong with that request. Please try again.