From 442487e3cbecf4e551c68ecaebd566fe06764d51 Mon Sep 17 00:00:00 2001
From: Michael Cetrulo <mail2samus@gmail.com>
Date: Tue, 15 May 2012 05:51:48 -0300
Subject: [PATCH 1/5] split Planet classes into separate files

---
 lib/planet.rb      | 128 +--------------------------------------------
 lib/planet/blog.rb |  74 ++++++++++++++++++++++++++
 lib/planet/post.rb |  58 ++++++++++++++++++++
 planet.gemspec     |   6 +--
 4 files changed, 134 insertions(+), 132 deletions(-)
 create mode 100644 lib/planet/blog.rb
 create mode 100644 lib/planet/post.rb

diff --git a/lib/planet.rb b/lib/planet.rb
index 409306b..91c91cc 100644
--- a/lib/planet.rb
+++ b/lib/planet.rb
@@ -1,6 +1,5 @@
-require 'feedzirra'
-require 'mustache'
 require 'planet/version'
+require 'planet/blog'
 
 class Planet
 
@@ -33,129 +32,4 @@ def write_posts
       File.open(file_name + '.markdown', "w+") { |f| f.write(post.to_s) }
     end
   end
-
-  class Post
-
-    attr_accessor :title, :content, :date, :url, :blog
-
-    def initialize(attributes = {})
-      self.title = attributes[:title]
-      self.content = attributes[:content]
-      self.date = attributes[:date]
-      self.url = attributes[:url]
-      self.blog = attributes[:blog]
-    end
-
-    def to_s
-      "#{ header }#{ content }#{ footer }"
-    end
-
-    def to_hash
-      {
-        post_content: self.content,
-        post_title: self.title,
-        post_date: self.date,
-        image_url: self.blog.image,
-        author: self.blog.author,
-        blog_url: self.blog.url,
-        blog_name: self.blog.name,
-        post_url: self.url,
-        twitter: self.blog.twitter,
-        twitter_url: "http://twitter.com/#{ self.blog.twitter }"
-      }
-    end
-
-    def header
-      ## TODO: We need categories/tags
-      file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'header.md'
-      file_contents = File.read(file)
-
-      Mustache.render(file_contents, self.to_hash)
-    end
-
-    def footer
-      file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'author.html'
-      file_contents = File.read(file)
-
-      Mustache.render(file_contents, self.to_hash)
-    end
-
-    def file_name
-      name_date = date ? date.strftime('%Y-%m-%d') : nil
-      name_title = title.downcase.scan(/\w+/).join('-')
-
-      [name_date, name_title].join('-')
-    end
-
-  end
-
-  class Blog
-
-    attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet
-
-    def initialize(attributes = {})
-      self.url = attributes[:url]
-      self.feed = attributes[:feed]
-      self.name = attributes[:name]
-      self.author = attributes[:author]
-      self.image = attributes[:image]
-      self.twitter = attributes[:twitter]
-      self.posts = attributes.fetch(:posts, [])
-      self.planet = attributes[:planet]
-    end
-
-    def fetch
-      feed = Feedzirra::Feed.fetch_and_parse(self.feed)
-
-      self.name ||= feed.title || 'the source'
-      self.url ||= feed.url
-
-      if self.url.nil?
-        abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml"
-      end
-
-      feed.entries.each do |entry|
-        ## TODO: I should probably consider using feed 'adapters' for specific
-        ## blog engine feeds that don't have their stuff on the standard fields.
-        ## Example: blogspot has the content on "summary" instead of content ¬¬.
-        content = if !entry.content.nil?
-                    self.sanitize_images(entry.content.strip)
-                  elsif !entry.summary.nil?
-                    self.sanitize_images(entry.summary.strip)
-                  else
-                    abort "=> No content found on entry"
-                  end
-
-        title = if !entry.title.nil?
-                  entry.title.sanitize
-                else
-                  self.name
-                end
-
-        self.posts << @post = Post.new(
-          title: title,
-          content: content,
-          date: entry.published,
-          url: self.url + entry.url,
-          blog: self
-        )
-
-        puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }"
-      end
-    end
-
-    def sanitize_images(html)
-      ## We take all images with src not matching http refs and append
-      ## the original blog to them.
-      html.scan(/<img src="([^h"]+)"/).flatten.each do |img|
-        if img[0] == '/'
-          html.gsub!(img, "#{ self.url }#{ img }")
-        else
-          html.gsub!(img, "#{ self.url }/#{ img }")
-        end
-      end
-
-      html
-    end
-  end
 end
diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb
new file mode 100644
index 0000000..9681e79
--- /dev/null
+++ b/lib/planet/blog.rb
@@ -0,0 +1,74 @@
+require 'feedzirra'
+require 'planet/post'
+
+class Planet
+  class Blog
+
+    attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet
+
+    def initialize(attributes = {})
+      self.url = attributes[:url]
+      self.feed = attributes[:feed]
+      self.name = attributes[:name]
+      self.author = attributes[:author]
+      self.image = attributes[:image]
+      self.twitter = attributes[:twitter]
+      self.posts = attributes.fetch(:posts, [])
+      self.planet = attributes[:planet]
+    end
+
+    def fetch
+      feed = Feedzirra::Feed.fetch_and_parse(self.feed)
+
+      self.name ||= feed.title || 'the source'
+      self.url ||= feed.url
+
+      if self.url.nil?
+        abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml"
+      end
+
+      feed.entries.each do |entry|
+        ## TODO: I should probably consider using feed 'adapters' for specific
+        ## blog engine feeds that don't have their stuff on the standard fields.
+        ## Example: blogspot has the content on "summary" instead of content ¬¬.
+        content = if !entry.content.nil?
+                    self.sanitize_images(entry.content.strip)
+                  elsif !entry.summary.nil?
+                    self.sanitize_images(entry.summary.strip)
+                  else
+                    abort "=> No content found on entry"
+                  end
+
+        title = if !entry.title.nil?
+                  entry.title.sanitize
+                else
+                  self.name
+                end
+
+        self.posts << @post = Post.new(
+          title: title,
+          content: content,
+          date: entry.published,
+          url: self.url + entry.url,
+          blog: self
+        )
+
+        puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }"
+      end
+    end
+
+    def sanitize_images(html)
+      ## We take all images with src not matching http refs and append
+      ## the original blog to them.
+      html.scan(/<img src="([^h"]+)"/).flatten.each do |img|
+        if img[0] == '/'
+          html.gsub!(img, "#{ self.url }#{ img }")
+        else
+          html.gsub!(img, "#{ self.url }/#{ img }")
+        end
+      end
+
+      html
+    end
+  end
+end
diff --git a/lib/planet/post.rb b/lib/planet/post.rb
new file mode 100644
index 0000000..d5bfff4
--- /dev/null
+++ b/lib/planet/post.rb
@@ -0,0 +1,58 @@
+require 'mustache'
+
+class Planet
+  class Post
+
+    attr_accessor :title, :content, :date, :url, :blog
+
+    def initialize(attributes = {})
+      self.title = attributes[:title]
+      self.content = attributes[:content]
+      self.date = attributes[:date]
+      self.url = attributes[:url]
+      self.blog = attributes[:blog]
+    end
+
+    def to_s
+      "#{ header }#{ content }#{ footer }"
+    end
+
+    def to_hash
+      {
+        post_content: self.content,
+        post_title: self.title,
+        post_date: self.date,
+        image_url: self.blog.image,
+        author: self.blog.author,
+        blog_url: self.blog.url,
+        blog_name: self.blog.name,
+        post_url: self.url,
+        twitter: self.blog.twitter,
+        twitter_url: "http://twitter.com/#{ self.blog.twitter }"
+      }
+    end
+
+    def header
+      ## TODO: We need categories/tags
+      file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'header.md'
+      file_contents = File.read(file)
+
+      Mustache.render(file_contents, self.to_hash)
+    end
+
+    def footer
+      file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'author.html'
+      file_contents = File.read(file)
+
+      Mustache.render(file_contents, self.to_hash)
+    end
+
+    def file_name
+      name_date = date ? date.strftime('%Y-%m-%d') : nil
+      name_title = title.downcase.scan(/\w+/).join('-')
+
+      [name_date, name_title].join('-')
+    end
+
+  end
+end
diff --git a/planet.gemspec b/planet.gemspec
index 2be00e1..6d9211f 100644
--- a/planet.gemspec
+++ b/planet.gemspec
@@ -8,11 +8,7 @@ spec = Gem::Specification.new do |s|
   s.homepage = 'http://poteland.com'
   s.platform = Gem::Platform::RUBY
   s.summary = 'An awesome rss/atom feed aggregator designed to work with Octopress/Jekyll'
-  s.files = %w(
-bin/planet
-lib/planet/version.rb
-lib/planet.rb
-  )
+  s.files = Dir['bin/*'] + Dir['lib/**/*.rb']
   s.require_paths << 'lib'
   s.has_rdoc = false
   s.bindir = 'bin'

From d58e28a935c4e9a3fed18ee2ae55d64b52eab8a4 Mon Sep 17 00:00:00 2001
From: Michael Cetrulo <mail2samus@gmail.com>
Date: Tue, 15 May 2012 07:43:39 -0300
Subject: [PATCH 2/5] don't expose internal classes

---
 bin/planet    | 17 ++++-------------
 lib/planet.rb | 12 +++++++++++-
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/bin/planet b/bin/planet
index 3e2113e..54cedaf 100755
--- a/bin/planet
+++ b/bin/planet
@@ -50,19 +50,10 @@ command :generate do |c|
   c.action do |global_options,options,args|
     conf = YAML.load_file('planet.yml')
 
-    @planet = Planet.new(config: conf.fetch('planet', {}))
-
-    conf['blogs'].each do |blog|
-      @planet.blogs << Planet::Blog.new(
-        feed: blog['feed'],
-        url: blog['url'],
-        author: blog['author'],
-        image: blog['image'],
-        posts: [],
-        planet: @planet,
-        twitter: blog['twitter']
-      )
-    end
+    @planet = Planet.new(
+      config: conf.fetch('planet', {}),
+      blogs:  conf.fetch('blogs',  [])
+    )
 
     @planet.aggregate
 
diff --git a/lib/planet.rb b/lib/planet.rb
index 91c91cc..dd6398f 100644
--- a/lib/planet.rb
+++ b/lib/planet.rb
@@ -7,7 +7,17 @@ class Planet
 
   def initialize(attributes = {})
     self.config = attributes[:config]
-    self.blogs = attributes.fetch(:blogs, [])
+    self.blogs  = attributes.fetch(:blogs, []).map do |blog|
+      Blog.new(
+        feed:    blog['feed'],
+        url:     blog['url'],
+        author:  blog['author'],
+        image:   blog['image'],
+        posts:   [],
+        planet:  self,
+        twitter: blog['twitter']
+      )
+    end
   end
 
   def posts

From 4830db6b47f8fbefba58141006262ca6d7de1161 Mon Sep 17 00:00:00 2001
From: Michael Cetrulo <michael.cetrulo@globant.com>
Date: Tue, 15 May 2012 17:37:20 -0300
Subject: [PATCH 3/5] =?UTF-8?q?modular=20approach=20foundation=208=C2=AC{?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/planet/blog.rb    | 12 ++++++------
 lib/planet/parsers.rb | 25 +++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 6 deletions(-)
 create mode 100644 lib/planet/parsers.rb

diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb
index 9681e79..02cdd5a 100644
--- a/lib/planet/blog.rb
+++ b/lib/planet/blog.rb
@@ -1,14 +1,15 @@
-require 'feedzirra'
 require 'planet/post'
+require 'planet/parsers'
 
 class Planet
   class Blog
 
-    attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet
+    attr_accessor :url, :feed, :type, :name, :author, :image, :twitter, :posts, :planet
 
     def initialize(attributes = {})
       self.url = attributes[:url]
       self.feed = attributes[:feed]
+      self.type = attributes[:type]
       self.name = attributes[:name]
       self.author = attributes[:author]
       self.image = attributes[:image]
@@ -18,7 +19,9 @@ def initialize(attributes = {})
     end
 
     def fetch
-      feed = Feedzirra::Feed.fetch_and_parse(self.feed)
+      parser = self.type ? Parsers.get_parser(self.type) : Parsers.get_parser_for(self.feed)
+
+      feed = parser.fetch_and_parse(self.feed)
 
       self.name ||= feed.title || 'the source'
       self.url ||= feed.url
@@ -28,9 +31,6 @@ def fetch
       end
 
       feed.entries.each do |entry|
-        ## TODO: I should probably consider using feed 'adapters' for specific
-        ## blog engine feeds that don't have their stuff on the standard fields.
-        ## Example: blogspot has the content on "summary" instead of content ¬¬.
         content = if !entry.content.nil?
                     self.sanitize_images(entry.content.strip)
                   elsif !entry.summary.nil?
diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb
new file mode 100644
index 0000000..00bf921
--- /dev/null
+++ b/lib/planet/parsers.rb
@@ -0,0 +1,25 @@
+require 'feedzirra'
+
+class Planet
+  class Parsers
+    @@parsers = []
+
+    def self.get_parser(type)
+      @@parsers.each do |parser|
+        return parser if parser.type == type
+      end
+
+      raise ArgumentError, "no parser for type '#{ type }'", caller
+    end
+
+    def self.get_parser_for(feed)
+      feed_domain = URI(feed).host
+
+      @@parsers.each do |parser|
+        return parser if parser.domains.any? { |domain| feed_domain.end_with? domain }
+      end
+
+      return Feedzirra::Feed
+    end
+  end
+end

From a8b3c2897576e87f8c7b0833b0c3610a8b4ae1a8 Mon Sep 17 00:00:00 2001
From: Michael Cetrulo <michael.cetrulo@globant.com>
Date: Wed, 16 May 2012 15:08:47 -0300
Subject: [PATCH 4/5] defining parser inheritance chain

---
 lib/planet/blog.rb                |  4 ++-
 lib/planet/parsers.rb             | 43 +++++++++++++++++++++++++------
 lib/planet/parsers/base_parser.rb | 21 +++++++++++++++
 3 files changed, 59 insertions(+), 9 deletions(-)
 create mode 100644 lib/planet/parsers/base_parser.rb

diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb
index 02cdd5a..e907105 100644
--- a/lib/planet/blog.rb
+++ b/lib/planet/blog.rb
@@ -16,10 +16,12 @@ def initialize(attributes = {})
       self.twitter = attributes[:twitter]
       self.posts = attributes.fetch(:posts, [])
       self.planet = attributes[:planet]
+
+      @parsers = Parsers.new
     end
 
     def fetch
-      parser = self.type ? Parsers.get_parser(self.type) : Parsers.get_parser_for(self.feed)
+      parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed)
 
       feed = parser.fetch_and_parse(self.feed)
 
diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb
index 00bf921..bb651ac 100644
--- a/lib/planet/parsers.rb
+++ b/lib/planet/parsers.rb
@@ -1,25 +1,52 @@
 require 'feedzirra'
+require 'set'
 
 class Planet
   class Parsers
-    @@parsers = []
+    @@parsers = Set.new
+
+    def self.add_parser(parser)
+      @@parsers << parser
+    end
+
+    def initialize
+      @types, @domains = {}, {}
 
-    def self.get_parser(type)
       @@parsers.each do |parser|
-        return parser if parser.type == type
+        new_type, new_domains = parser.type, parser.domains
+
+        fail("duplicate type") if new_type and @types.has_key? new_type
+        fail("overlapping domains") unless (@domains.keys & new_domains).empty?
+
+        @types[new_type] = parser if new_type
+        new_domains.each do |new_domain|
+          @domains[new_domain] = parser
+        end
       end
+    end
 
-      raise ArgumentError, "no parser for type '#{ type }'", caller
+    def get_parser(type)
+      begin
+        return @types.fetch(type)
+      rescue KeyError => e
+        raise(ArgumentError, "No parser for type '#{ type }'", caller)
+      end
     end
 
-    def self.get_parser_for(feed)
+    def get_parser_for(feed)
       feed_domain = URI(feed).host
 
-      @@parsers.each do |parser|
-        return parser if parser.domains.any? { |domain| feed_domain.end_with? domain }
+      @domains.each do |domain, parser|
+        return parser if feed_domain.end_with? domain
       end
 
-      return Feedzirra::Feed
+      return Feedzirra::Feed # default generic parser
     end
   end
 end
+
+# load parsers
+dirname = File.join([File.dirname(__FILE__), 'parsers'])
+Dir.open(dirname).each do |filename|
+  require "#{dirname}/#{filename}" if filename.end_with? '.rb'
+end
diff --git a/lib/planet/parsers/base_parser.rb b/lib/planet/parsers/base_parser.rb
new file mode 100644
index 0000000..6b4f737
--- /dev/null
+++ b/lib/planet/parsers/base_parser.rb
@@ -0,0 +1,21 @@
+class Planet
+  class Parsers
+    class BaseParser
+      def self.type
+        @type
+      end
+
+      def self.domains
+        @domains || []
+      end
+
+      def self.inherited(parser)
+        Parsers.add_parser parser
+      end
+
+      def self.fetch_and_parse(feed)
+        raise(Exception, "Not implemented", caller)
+      end
+    end
+  end
+end

From f84c033f3a4b53afda1155a4e649dd6adaa71c50 Mon Sep 17 00:00:00 2001
From: Michael Cetrulo <michael.cetrulo@globant.com>
Date: Fri, 18 May 2012 15:31:39 -0300
Subject: [PATCH 5/5] comments

---
 lib/planet/blog.rb                |  3 +++
 lib/planet/parsers.rb             | 12 ++++++++++++
 lib/planet/parsers/base_parser.rb |  3 +++
 3 files changed, 18 insertions(+)

diff --git a/lib/planet/blog.rb b/lib/planet/blog.rb
index e907105..78ccbd8 100644
--- a/lib/planet/blog.rb
+++ b/lib/planet/blog.rb
@@ -17,12 +17,15 @@ def initialize(attributes = {})
       self.posts = attributes.fetch(:posts, [])
       self.planet = attributes[:planet]
 
+      # get parser-manager instance
       @parsers = Parsers.new
     end
 
     def fetch
+      # given parser can be set arbitrarily with :type or inferred from the domain
       parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed)
 
+      # parser instances should mimick Feedzirra interface
       feed = parser.fetch_and_parse(self.feed)
 
       self.name ||= feed.title || 'the source'
diff --git a/lib/planet/parsers.rb b/lib/planet/parsers.rb
index bb651ac..e860d19 100644
--- a/lib/planet/parsers.rb
+++ b/lib/planet/parsers.rb
@@ -2,6 +2,11 @@
 require 'set'
 
 class Planet
+  # Parsers class - manager for the feed parsers
+  #
+  # parser classes inherit from Planet::Parsers::BaseParser
+  # and are added automatically to the list of available parsers.
+  # files located on planet/parsers are automatically loaded.
   class Parsers
     @@parsers = Set.new
 
@@ -9,6 +14,10 @@ def self.add_parser(parser)
       @@parsers << parser
     end
 
+    # Parser instances keep indexes of the available parsers and
+    # check for duplicate definitions (need to use an instance
+    # because #inherited gets called as soon as the class is seen
+    # but before it is fully defined).
     def initialize
       @types, @domains = {}, {}
 
@@ -25,6 +34,7 @@ def initialize
       end
     end
 
+    # returns the appropiate parser based on the type
     def get_parser(type)
       begin
         return @types.fetch(type)
@@ -33,6 +43,8 @@ def get_parser(type)
       end
     end
 
+    # returns any parser that can handle this feeds' domain,
+    # defaults to Feedzirra if none available.
     def get_parser_for(feed)
       feed_domain = URI(feed).host
 
diff --git a/lib/planet/parsers/base_parser.rb b/lib/planet/parsers/base_parser.rb
index 6b4f737..ea15a8d 100644
--- a/lib/planet/parsers/base_parser.rb
+++ b/lib/planet/parsers/base_parser.rb
@@ -1,5 +1,8 @@
 class Planet
   class Parsers
+    # base class for feed parsers
+    # subclasses should declare @type and @domains
+    # and also mimick Feedzirra interface.
     class BaseParser
       def self.type
         @type