Permalink
Browse files

Merge pull request #21 from git2samus/parser

modular architecture for feed parsers by samus
  • Loading branch information...
2 parents 85db784 + f84c033 commit 45d74df4f5c75e2e7426807139f3881e7da6e207 @pote committed May 21, 2012
Showing with 242 additions and 146 deletions.
  1. +4 −13 bin/planet
  2. +12 −128 lib/planet.rb
  3. +79 −0 lib/planet/blog.rb
  4. +64 −0 lib/planet/parsers.rb
  5. +24 −0 lib/planet/parsers/base_parser.rb
  6. +58 −0 lib/planet/post.rb
  7. +1 −5 planet.gemspec
View
@@ -50,19 +50,10 @@ command :generate do |c|
c.action do |global_options,options,args|
conf = YAML.load_file('planet.yml')
- @planet = Planet.new(config: conf.fetch('planet', {}))
-
- conf['blogs'].each do |blog|
- @planet.blogs << Planet::Blog.new(
- feed: blog['feed'],
- url: blog['url'],
- author: blog['author'],
- image: blog['image'],
- posts: [],
- planet: @planet,
- twitter: blog['twitter']
- )
- end
+ @planet = Planet.new(
+ config: conf.fetch('planet', {}),
+ blogs: conf.fetch('blogs', [])
+ )
@planet.aggregate
View
@@ -1,14 +1,23 @@
-require 'feedzirra'
-require 'mustache'
require 'planet/version'
+require 'planet/blog'
class Planet
attr_accessor :config, :blogs
def initialize(attributes = {})
self.config = attributes[:config]
- self.blogs = attributes.fetch(:blogs, [])
+ self.blogs = attributes.fetch(:blogs, []).map do |blog|
+ Blog.new(
+ feed: blog['feed'],
+ url: blog['url'],
+ author: blog['author'],
+ image: blog['image'],
+ posts: [],
+ planet: self,
+ twitter: blog['twitter']
+ )
+ end
end
def posts
@@ -33,129 +42,4 @@ def write_posts
File.open(file_name + '.markdown', "w+") { |f| f.write(post.to_s) }
end
end
-
- class Post
-
- attr_accessor :title, :content, :date, :url, :blog
-
- def initialize(attributes = {})
- self.title = attributes[:title]
- self.content = attributes[:content]
- self.date = attributes[:date]
- self.url = attributes[:url]
- self.blog = attributes[:blog]
- end
-
- def to_s
- "#{ header }#{ content }#{ footer }"
- end
-
- def to_hash
- {
- post_content: self.content,
- post_title: self.title,
- post_date: self.date,
- image_url: self.blog.image,
- author: self.blog.author,
- blog_url: self.blog.url,
- blog_name: self.blog.name,
- post_url: self.url,
- twitter: self.blog.twitter,
- twitter_url: "http://twitter.com/#{ self.blog.twitter }"
- }
- end
-
- def header
- ## TODO: We need categories/tags
- file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'header.md'
- file_contents = File.read(file)
-
- Mustache.render(file_contents, self.to_hash)
- end
-
- def footer
- file = self.blog.planet.config.fetch('templates_directory', '_layouts/') + 'author.html'
- file_contents = File.read(file)
-
- Mustache.render(file_contents, self.to_hash)
- end
-
- def file_name
- name_date = date ? date.strftime('%Y-%m-%d') : nil
- name_title = title.downcase.scan(/\w+/).join('-')
-
- [name_date, name_title].join('-')
- end
-
- end
-
- class Blog
-
- attr_accessor :url, :feed, :name, :author, :image, :twitter, :posts, :planet
-
- def initialize(attributes = {})
- self.url = attributes[:url]
- self.feed = attributes[:feed]
- self.name = attributes[:name]
- self.author = attributes[:author]
- self.image = attributes[:image]
- self.twitter = attributes[:twitter]
- self.posts = attributes.fetch(:posts, [])
- self.planet = attributes[:planet]
- end
-
- def fetch
- feed = Feedzirra::Feed.fetch_and_parse(self.feed)
-
- self.name ||= feed.title || 'the source'
- self.url ||= feed.url
-
- if self.url.nil?
- abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml"
- end
-
- feed.entries.each do |entry|
- ## TODO: I should probably consider using feed 'adapters' for specific
- ## blog engine feeds that don't have their stuff on the standard fields.
- ## Example: blogspot has the content on "summary" instead of content ¬¬.
- content = if !entry.content.nil?
- self.sanitize_images(entry.content.strip)
- elsif !entry.summary.nil?
- self.sanitize_images(entry.summary.strip)
- else
- abort "=> No content found on entry"
- end
-
- title = if !entry.title.nil?
- entry.title.sanitize
- else
- self.name
- end
-
- self.posts << @post = Post.new(
- title: title,
- content: content,
- date: entry.published,
- url: self.url + entry.url,
- blog: self
- )
-
- puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }"
- end
- end
-
- def sanitize_images(html)
- ## We take all images with src not matching http refs and append
- ## the original blog to them.
- html.scan(/<img src="([^h"]+)"/).flatten.each do |img|
- if img[0] == '/'
- html.gsub!(img, "#{ self.url }#{ img }")
- else
- html.gsub!(img, "#{ self.url }/#{ img }")
- end
- end
-
- html
- end
- end
end
View
@@ -0,0 +1,79 @@
+require 'planet/post'
+require 'planet/parsers'
+
+class Planet
+ class Blog
+
+ attr_accessor :url, :feed, :type, :name, :author, :image, :twitter, :posts, :planet
+
+ def initialize(attributes = {})
+ self.url = attributes[:url]
+ self.feed = attributes[:feed]
+ self.type = attributes[:type]
+ self.name = attributes[:name]
+ self.author = attributes[:author]
+ self.image = attributes[:image]
+ self.twitter = attributes[:twitter]
+ self.posts = attributes.fetch(:posts, [])
+ self.planet = attributes[:planet]
+
+ # get parser-manager instance
+ @parsers = Parsers.new
+ end
+
+ def fetch
+ # given parser can be set arbitrarily with :type or inferred from the domain
+ parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed)
+
+ # parser instances should mimick Feedzirra interface
+ feed = parser.fetch_and_parse(self.feed)
+
+ self.name ||= feed.title || 'the source'
+ self.url ||= feed.url
+
+ if self.url.nil?
+ abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml"
+ end
+
+ feed.entries.each do |entry|
+ content = if !entry.content.nil?
+ self.sanitize_images(entry.content.strip)
+ elsif !entry.summary.nil?
+ self.sanitize_images(entry.summary.strip)
+ else
+ abort "=> No content found on entry"
+ end
+
+ title = if !entry.title.nil?
+ entry.title.sanitize
+ else
+ self.name
+ end
+
+ self.posts << @post = Post.new(
+ title: title,
+ content: content,
+ date: entry.published,
+ url: self.url + entry.url,
+ blog: self
+ )
+
+ puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }"
+ end
+ end
+
+ def sanitize_images(html)
+ ## We take all images with src not matching http refs and append
+ ## the original blog to them.
+ html.scan(/<img src="([^h"]+)"/).flatten.each do |img|
+ if img[0] == '/'
+ html.gsub!(img, "#{ self.url }#{ img }")
+ else
+ html.gsub!(img, "#{ self.url }/#{ img }")
+ end
+ end
+
+ html
+ end
+ end
+end
View
@@ -0,0 +1,64 @@
+require 'feedzirra'
+require 'set'
+
+class Planet
+ # Parsers class - manager for the feed parsers
+ #
+ # parser classes inherit from Planet::Parsers::BaseParser
+ # and are added automatically to the list of available parsers.
+ # files located on planet/parsers are automatically loaded.
+ class Parsers
+ @@parsers = Set.new
+
+ def self.add_parser(parser)
+ @@parsers << parser
+ end
+
+ # Parser instances keep indexes of the available parsers and
+ # check for duplicate definitions (need to use an instance
+ # because #inherited gets called as soon as the class is seen
+ # but before it is fully defined).
+ def initialize
+ @types, @domains = {}, {}
+
+ @@parsers.each do |parser|
+ new_type, new_domains = parser.type, parser.domains
+
+ fail("duplicate type") if new_type and @types.has_key? new_type
+ fail("overlapping domains") unless (@domains.keys & new_domains).empty?
+
+ @types[new_type] = parser if new_type
+ new_domains.each do |new_domain|
+ @domains[new_domain] = parser
+ end
+ end
+ end
+
+ # returns the appropiate parser based on the type
+ def get_parser(type)
+ begin
+ return @types.fetch(type)
+ rescue KeyError => e
+ raise(ArgumentError, "No parser for type '#{ type }'", caller)
+ end
+ end
+
+ # returns any parser that can handle this feeds' domain,
+ # defaults to Feedzirra if none available.
+ def get_parser_for(feed)
+ feed_domain = URI(feed).host
+
+ @domains.each do |domain, parser|
+ return parser if feed_domain.end_with? domain
+ end
+
+ return Feedzirra::Feed # default generic parser
+ end
+ end
+end
+
+# load parsers
+dirname = File.join([File.dirname(__FILE__), 'parsers'])
+Dir.open(dirname).each do |filename|
+ require "#{dirname}/#{filename}" if filename.end_with? '.rb'
+end
@@ -0,0 +1,24 @@
+class Planet
+ class Parsers
+ # base class for feed parsers
+ # subclasses should declare @type and @domains
+ # and also mimick Feedzirra interface.
+ class BaseParser
+ def self.type
+ @type
+ end
+
+ def self.domains
+ @domains || []
+ end
+
+ def self.inherited(parser)
+ Parsers.add_parser parser
+ end
+
+ def self.fetch_and_parse(feed)
+ raise(Exception, "Not implemented", caller)
+ end
+ end
+ end
+end
Oops, something went wrong.

0 comments on commit 45d74df

Please sign in to comment.