added processors and wrote an example

mattetti · Dec 27, 2012 · ec2593a · ec2593a
1 parent fc0380b
commit ec2593a
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 3 deletions.
diff --git a/processors/episode_summary.rb b/processors/episode_summary.rb
@@ -0,0 +1,51 @@
+# Creates a formatted summary of a collection of episodes.
+#
+class EpisodeSummary
+
+  attr_accessor :items
+
+  # Converts the passed episode items in a summary
+  # that is formatted based on the passed format.
+  # @param [Array<#title>] items The episodes to summarize
+  # @param [Symbol] format The summary format (:html and :json
+  # supported)
+  # @return [String]
+  def process(items, format=:html)
+    self.items = items
+    if format == :html
+      html_header + "\n" + \
+      items.map{|i| html_episode_summary(i)}.join("\n") + \
+      html_footer
+    elsif format == :json
+      items.map(&:to_json)
+    else
+     "Format #{format} not supported"
+    end
+  end
+
+  def html_header
+    <<-EOS
+    <!DOCTYPE html>
+    <html>
+    <head><meta charset="utf-8"></head>
+    <body>
+    <div>
+      <h1>List of episodes</h1>
+      <ul>      
+    EOS
+  end
+
+  def html_footer
+    "</ul></div></body></html>"
+  end
+
+  def html_episode_summary(item)
+    <<-EOS
+      <li>
+        <h2>#{item.show_name} - #{item.title}</h2>
+        <a href="#{item.url}">Link (#{item.notes})</a>
+      </li>
+    EOS
+  end
+
+end
diff --git a/processors/to_file.rb b/processors/to_file.rb
@@ -0,0 +1,19 @@
+require 'tempfile'
+
+class ToFile
+
+  # Saves the passed content to a file.
+  # @param [#to_s] content The content to save to file.
+  # @param [String, NilClass] destination The path to save the content,
+  # if none is passed, a tmpfile is used.
+  # @return [String] The path of the file the content was saved to.
+  def process(content, destination=nil)
+    if destination
+      file = File.open(destination, 'w'){|f| f << content}
+    else
+      file = Tempfile.new('scrapbook'){|f| f << content}
+    end
+    file.path
+  end
+
+end
diff --git a/runner.rb b/runner.rb
@@ -1,9 +1,16 @@
 require 'bundler'
 Bundler.require
+require 'fileutils'
+
+STDOUT.sync = true
 
 # Require all the scrapers
 Dir.glob("./scrapers/*.rb"){|file| require file }
+Dir.glob("./processors/*.rb"){|file| require file }
 
+FileUtils.mkdir_p('output')
 # TODO: use a scheduler and send to processors
 episodes = FranceTVJeunesse.run
-puts episodes.map(&:to_json)
+summary = EpisodeSummary.new.process(episodes)
+destination = File.join(File.expand_path(File.dirname(__FILE__)), "output", "summary_#{Time.now.strftime("%Y-%m-%d")}.html")
+puts ToFile.new.process(summary, destination)
diff --git a/scrapers/pluzz_francetv_fr.rb b/scrapers/pluzz_francetv_fr.rb
@@ -8,8 +8,8 @@ def self.run
     url = "http://pluzz.francetv.fr/ajax/launchsearch/rubrique/jeunesse/datedebut/#{Time.now.strftime("%Y-%m-%dT00:00")}/datefin/#{Time.now.strftime("%Y-%m-%dT23:59")}/type/lesplusrecents/nb/100/"
     page = agent.get(url)
     episodes = fetch_episodes(page)
-    puts "success" unless episodes.find{|e| e.failed?}
-    episides
+    STDERR << "Error scraping #{url}" if episodes.find{|e| e.failed?}
+    episodes
   end
 
   def self.fetch_episodes(page)