Skip to content

Commit

Permalink
added processors and wrote an example
Browse files Browse the repository at this point in the history
  • Loading branch information
mattetti committed Dec 27, 2012
1 parent fc0380b commit ec2593a
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 3 deletions.
51 changes: 51 additions & 0 deletions processors/episode_summary.rb
@@ -0,0 +1,51 @@
# Creates a formatted summary of a collection of episodes.
#
class EpisodeSummary

attr_accessor :items

# Converts the passed episode items in a summary
# that is formatted based on the passed format.
# @param [Array<#title>] items The episodes to summarize
# @param [Symbol] format The summary format (:html and :json
# supported)
# @return [String]
def process(items, format=:html)
self.items = items
if format == :html
html_header + "\n" + \
items.map{|i| html_episode_summary(i)}.join("\n") + \
html_footer
elsif format == :json
items.map(&:to_json)
else
"Format #{format} not supported"
end
end

def html_header
<<-EOS
<!DOCTYPE html>
<html>
<head><meta charset="utf-8"></head>
<body>
<div>
<h1>List of episodes</h1>
<ul>
EOS
end

def html_footer
"</ul></div></body></html>"
end

def html_episode_summary(item)
<<-EOS
<li>
<h2>#{item.show_name} - #{item.title}</h2>
<a href="#{item.url}">Link (#{item.notes})</a>
</li>
EOS
end

end
19 changes: 19 additions & 0 deletions processors/to_file.rb
@@ -0,0 +1,19 @@
require 'tempfile'

class ToFile

# Saves the passed content to a file.
# @param [#to_s] content The content to save to file.
# @param [String, NilClass] destination The path to save the content,
# if none is passed, a tmpfile is used.
# @return [String] The path of the file the content was saved to.
def process(content, destination=nil)
if destination
file = File.open(destination, 'w'){|f| f << content}
else
file = Tempfile.new('scrapbook'){|f| f << content}
end
file.path
end

end
9 changes: 8 additions & 1 deletion runner.rb
@@ -1,9 +1,16 @@
require 'bundler'
Bundler.require
require 'fileutils'

STDOUT.sync = true

# Require all the scrapers
Dir.glob("./scrapers/*.rb"){|file| require file }
Dir.glob("./processors/*.rb"){|file| require file }

FileUtils.mkdir_p('output')
# TODO: use a scheduler and send to processors
episodes = FranceTVJeunesse.run
puts episodes.map(&:to_json)
summary = EpisodeSummary.new.process(episodes)
destination = File.join(File.expand_path(File.dirname(__FILE__)), "output", "summary_#{Time.now.strftime("%Y-%m-%d")}.html")
puts ToFile.new.process(summary, destination)
4 changes: 2 additions & 2 deletions scrapers/pluzz_francetv_fr.rb
Expand Up @@ -8,8 +8,8 @@ def self.run
url = "http://pluzz.francetv.fr/ajax/launchsearch/rubrique/jeunesse/datedebut/#{Time.now.strftime("%Y-%m-%dT00:00")}/datefin/#{Time.now.strftime("%Y-%m-%dT23:59")}/type/lesplusrecents/nb/100/"
page = agent.get(url)
episodes = fetch_episodes(page)
puts "success" unless episodes.find{|e| e.failed?}
episides
STDERR << "Error scraping #{url}" if episodes.find{|e| e.failed?}
episodes
end

def self.fetch_episodes(page)
Expand Down

0 comments on commit ec2593a

Please sign in to comment.