From 442487e3cbecf4e551c68ecaebd566fe06764d51 Mon Sep 17 00:00:00 2001 From: Michael Cetrulo Date: Tue, 15 May 2012 05:51:48 -0300 Subject: [PATCH 1/5] split Planet classes into separate files --- lib/planet.rb | 128 +-------------------------------------------- lib/planet/blog.rb | 74 ++++++++++++++++++++++++++ lib/planet/post.rb | 58 ++++++++++++++++++++ planet.gemspec | 6 +-- 4 files changed, 134 insertions(+), 132 deletions(-) create mode 100644 lib/planet/blog.rb create mode 100644 lib/planet/post.rb diff --git a/lib/planet.rb b/lib/planet.rb index 409306b..91c91cc 100644 --- a/lib/planet.rb +++ b/lib/planet.rb @@ -1,6 +1,5 @@ -require 'feedzirra' -require 'mustache' require 'planet/version' +require 'planet/blog' class Planet @@ -33,129 +32,4 @@ def write_posts File.open(file_name + '.markdown', "w+") { |f| f.write(post.to_s) } end end - - class Post - - attr_accessor :title, :content, :date, :url, :blog - - def initialize(attributes = {}) - self.title = attributes[:title] - self.content = attributes[:content] - self.date = attributes[:date] - self.url = attributes[:url] - self.blog = attributes[:blog] - end - - def to_s - "#{ header }#{ content }#{ footer }" - end - - def to_hash - { - post_content: self.content, - post_title: self.title, - post_date: self.date, - image_url: self.blog.image, - author: self.blog.author, - blog_url: self.blog.url, - blog_name: self.blog.name, - post_url: self.url, - twitter: self.blog.twitter, - twitter_url: "http://twitter.com/#{ self.blog.twitter }" - } - end - - def header - ## TODO: We need categories/tags - file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'header.md' - file_contents = File.read(file) - - Mustache.render(file_contents, self.to_hash) - end - - def footer - file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'author.html' - file_contents = File.read(file) - - Mustache.render(file_contents, self.to_hash) - end - - def file_name - name_date = date ? date.strftime('%Y-%m-%d') : nil - name_title = title.downcase.scan(/\w+/).join('-') - - [name_date, name_title].join('-') - end - - end - - class Blog - - attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet - - def initialize(attributes = {}) - self.url = attributes[:url] - self.feed = attributes[:feed] - self.name = attributes[:name] - self.author = attributes[:author] - self.image = attributes[:image] - self.twitter = attributes[:twitter] - self.posts = attributes.fetch(:posts, []) - self.planet = attributes[:planet] - end - - def fetch - feed = Feedzirra::Feed.fetch_and_parse(self.feed) - - self.name ||= feed.title || 'the source' - self.url ||= feed.url - - if self.url.nil? - abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml" - end - - feed.entries.each do |entry| - ## TODO: I should probably consider using feed 'adapters' for specific - ## blog engine feeds that don't have their stuff on the standard fields. - ## Example: blogspot has the content on "summary" instead of content ¬¬. - content = if !entry.content.nil? - self.sanitize_images(entry.content.strip) - elsif !entry.summary.nil? - self.sanitize_images(entry.summary.strip) - else - abort "=> No content found on entry" - end - - title = if !entry.title.nil? - entry.title.sanitize - else - self.name - end - - self.posts << @post = Post.new( - title: title, - content: content, - date: entry.published, - url: self.url + entry.url, - blog: self - ) - - puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }" - end - end - - def sanitize_images(html) - ## We take all images with src not matching http refs and append - ## the original blog to them. - html.scan(/ No content found on entry" + end + + title = if !entry.title.nil? + entry.title.sanitize + else + self.name + end + + self.posts << @post = Post.new( + title: title, + content: content, + date: entry.published, + url: self.url + entry.url, + blog: self + ) + + puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }" + end + end + + def sanitize_images(html) + ## We take all images with src not matching http refs and append + ## the original blog to them. + html.scan(/ Date: Tue, 15 May 2012 07:43:39 -0300 Subject: [PATCH 2/5] don't expose internal classes --- bin/planet | 17 ++++------------- lib/planet.rb | 12 +++++++++++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/bin/planet b/bin/planet index 3e2113e..54cedaf 100755 --- a/bin/planet +++ b/bin/planet @@ -50,19 +50,10 @@ command :generate do |c| c.action do |global_options,options,args| conf = YAML.load_file('planet.yml') - @planet = Planet.new(config: conf.fetch('planet', {})) - - conf['blogs'].each do |blog| - @planet.blogs << Planet::Blog.new( - feed: blog['feed'], - url: blog['url'], - author: blog['author'], - image: blog['image'], - posts: [], - planet: @planet, - twitter: blog['twitter'] - ) - end + @planet = Planet.new( + config: conf.fetch('planet', {}), + blogs: conf.fetch('blogs', []) + ) @planet.aggregate diff --git a/lib/planet.rb b/lib/planet.rb index 91c91cc..dd6398f 100644 --- a/lib/planet.rb +++ b/lib/planet.rb @@ -7,7 +7,17 @@ class Planet def initialize(attributes = {}) self.config = attributes[:config] - self.blogs = attributes.fetch(:blogs, []) + self.blogs = attributes.fetch(:blogs, []).map do |blog| + Blog.new( + feed: blog['feed'], + url: blog['url'], + author: blog['author'], + image: blog['image'], + posts: [], + planet: self, + twitter: blog['twitter'] + ) + end end def posts From 4830db6b47f8fbefba58141006262ca6d7de1161 Mon Sep 17 00:00:00 2001 From: Michael Cetrulo Date: Tue, 15 May 2012 17:37:20 -0300 Subject: [PATCH 3/5] =?UTF-8?q?modular=20approach=20foundation=208=C2=AC{?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/planet/blog.rb | 12 ++++++------ lib/planet/parsers.rb | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 lib/planet/parsers.rb diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb index 9681e79..02cdd5a 100644 --- a/lib/planet/blog.rb +++ b/lib/planet/blog.rb @@ -1,14 +1,15 @@ -require 'feedzirra' require 'planet/post' +require 'planet/parsers' class Planet class Blog - attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet + attr_accessor :url, :feed, :type, :name, :author, :image, :twitter, :posts, :planet def initialize(attributes = {}) self.url = attributes[:url] self.feed = attributes[:feed] + self.type = attributes[:type] self.name = attributes[:name] self.author = attributes[:author] self.image = attributes[:image] @@ -18,7 +19,9 @@ def initialize(attributes = {}) end def fetch - feed = Feedzirra::Feed.fetch_and_parse(self.feed) + parser = self.type ? Parsers.get_parser(self.type) : Parsers.get_parser_for(self.feed) + + feed = parser.fetch_and_parse(self.feed) self.name ||= feed.title || 'the source' self.url ||= feed.url @@ -28,9 +31,6 @@ def fetch end feed.entries.each do |entry| - ## TODO: I should probably consider using feed 'adapters' for specific - ## blog engine feeds that don't have their stuff on the standard fields. - ## Example: blogspot has the content on "summary" instead of content ¬¬. content = if !entry.content.nil? self.sanitize_images(entry.content.strip) elsif !entry.summary.nil? diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb new file mode 100644 index 0000000..00bf921 --- /dev/null +++ b/lib/planet/parsers.rb @@ -0,0 +1,25 @@ +require 'feedzirra' + +class Planet + class Parsers + @@parsers = [] + + def self.get_parser(type) + @@parsers.each do |parser| + return parser if parser.type == type + end + + raise ArgumentError, "no parser for type '#{ type }'", caller + end + + def self.get_parser_for(feed) + feed_domain = URI(feed).host + + @@parsers.each do |parser| + return parser if parser.domains.any? { |domain| feed_domain.end_with? domain } + end + + return Feedzirra::Feed + end + end +end From a8b3c2897576e87f8c7b0833b0c3610a8b4ae1a8 Mon Sep 17 00:00:00 2001 From: Michael Cetrulo Date: Wed, 16 May 2012 15:08:47 -0300 Subject: [PATCH 4/5] defining parser inheritance chain --- lib/planet/blog.rb | 4 ++- lib/planet/parsers.rb | 43 +++++++++++++++++++++++++------ lib/planet/parsers/base_parser.rb | 21 +++++++++++++++ 3 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 lib/planet/parsers/base_parser.rb diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb index 02cdd5a..e907105 100644 --- a/lib/planet/blog.rb +++ b/lib/planet/blog.rb @@ -16,10 +16,12 @@ def initialize(attributes = {}) self.twitter = attributes[:twitter] self.posts = attributes.fetch(:posts, []) self.planet = attributes[:planet] + + @parsers = Parsers.new end def fetch - parser = self.type ? Parsers.get_parser(self.type) : Parsers.get_parser_for(self.feed) + parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed) feed = parser.fetch_and_parse(self.feed) diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb index 00bf921..bb651ac 100644 --- a/lib/planet/parsers.rb +++ b/lib/planet/parsers.rb @@ -1,25 +1,52 @@ require 'feedzirra' +require 'set' class Planet class Parsers - @@parsers = [] + @@parsers = Set.new + + def self.add_parser(parser) + @@parsers << parser + end + + def initialize + @types, @domains = {}, {} - def self.get_parser(type) @@parsers.each do |parser| - return parser if parser.type == type + new_type, new_domains = parser.type, parser.domains + + fail("duplicate type") if new_type and @types.has_key? new_type + fail("overlapping domains") unless (@domains.keys & new_domains).empty? + + @types[new_type] = parser if new_type + new_domains.each do |new_domain| + @domains[new_domain] = parser + end end + end - raise ArgumentError, "no parser for type '#{ type }'", caller + def get_parser(type) + begin + return @types.fetch(type) + rescue KeyError => e + raise(ArgumentError, "No parser for type '#{ type }'", caller) + end end - def self.get_parser_for(feed) + def get_parser_for(feed) feed_domain = URI(feed).host - @@parsers.each do |parser| - return parser if parser.domains.any? { |domain| feed_domain.end_with? domain } + @domains.each do |domain, parser| + return parser if feed_domain.end_with? domain end - return Feedzirra::Feed + return Feedzirra::Feed # default generic parser end end end + +# load parsers +dirname = File.join([File.dirname(__FILE__), 'parsers']) +Dir.open(dirname).each do |filename| + require "#{dirname}/#{filename}" if filename.end_with? '.rb' +end diff --git a/lib/planet/parsers/base_parser.rb b/lib/planet/parsers/base_parser.rb new file mode 100644 index 0000000..6b4f737 --- /dev/null +++ b/lib/planet/parsers/base_parser.rb @@ -0,0 +1,21 @@ +class Planet + class Parsers + class BaseParser + def self.type + @type + end + + def self.domains + @domains || [] + end + + def self.inherited(parser) + Parsers.add_parser parser + end + + def self.fetch_and_parse(feed) + raise(Exception, "Not implemented", caller) + end + end + end +end From f84c033f3a4b53afda1155a4e649dd6adaa71c50 Mon Sep 17 00:00:00 2001 From: Michael Cetrulo Date: Fri, 18 May 2012 15:31:39 -0300 Subject: [PATCH 5/5] comments --- lib/planet/blog.rb | 3 +++ lib/planet/parsers.rb | 12 ++++++++++++ lib/planet/parsers/base_parser.rb | 3 +++ 3 files changed, 18 insertions(+) diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb index e907105..78ccbd8 100644 --- a/lib/planet/blog.rb +++ b/lib/planet/blog.rb @@ -17,12 +17,15 @@ def initialize(attributes = {}) self.posts = attributes.fetch(:posts, []) self.planet = attributes[:planet] + # get parser-manager instance @parsers = Parsers.new end def fetch + # given parser can be set arbitrarily with :type or inferred from the domain parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed) + # parser instances should mimick Feedzirra interface feed = parser.fetch_and_parse(self.feed) self.name ||= feed.title || 'the source' diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb index bb651ac..e860d19 100644 --- a/lib/planet/parsers.rb +++ b/lib/planet/parsers.rb @@ -2,6 +2,11 @@ require 'set' class Planet + # Parsers class - manager for the feed parsers + # + # parser classes inherit from Planet::Parsers::BaseParser + # and are added automatically to the list of available parsers. + # files located on planet/parsers are automatically loaded. class Parsers @@parsers = Set.new @@ -9,6 +14,10 @@ def self.add_parser(parser) @@parsers << parser end + # Parser instances keep indexes of the available parsers and + # check for duplicate definitions (need to use an instance + # because #inherited gets called as soon as the class is seen + # but before it is fully defined). def initialize @types, @domains = {}, {} @@ -25,6 +34,7 @@ def initialize end end + # returns the appropiate parser based on the type def get_parser(type) begin return @types.fetch(type) @@ -33,6 +43,8 @@ def get_parser(type) end end + # returns any parser that can handle this feeds' domain, + # defaults to Feedzirra if none available. def get_parser_for(feed) feed_domain = URI(feed).host diff --git a/lib/planet/parsers/base_parser.rb b/lib/planet/parsers/base_parser.rb index 6b4f737..ea15a8d 100644 --- a/lib/planet/parsers/base_parser.rb +++ b/lib/planet/parsers/base_parser.rb @@ -1,5 +1,8 @@ class Planet class Parsers + # base class for feed parsers + # subclasses should declare @type and @domains + # and also mimick Feedzirra interface. class BaseParser def self.type @type