Permalink
Browse files

Replace nokogiri with ox for faster sax parsing

  • Loading branch information...
1 parent c35ebb5 commit 2e8b4a9424c97be66afce9741d8584c8e77e0030 Gabriel Evans committed Jan 4, 2013
Showing with 93 additions and 220 deletions.
  1. +7 −7 Gemfile.lock
  2. +11 −11 eventful-ruby.gemspec
  3. +54 −22 lib/eventful/feed/document.rb
  4. +21 −180 lib/eventful/feed/node.rb
View
@@ -4,16 +4,16 @@ PATH
eventful-ruby (0.0.1)
activesupport (~> 3.1)
em-http-request (~> 1.0)
- faraday (>= 0.8.0, < 0.9.0)
- faraday_middleware (= 0.9.0)
+ faraday (~> 0.8)
+ faraday_middleware (~> 0.9)
hashie (~> 1.2.0)
multi_xml (~> 0.5)
- nokogiri (~> 1.5.5)
+ ox (~> 1.8)
GEM
remote: https://rubygems.org/
specs:
- activesupport (3.2.9)
+ activesupport (3.2.10)
i18n (~> 0.6)
multi_json (~> 1.0)
addressable (2.3.2)
@@ -53,19 +53,19 @@ GEM
hashie (1.2.0)
http_parser.rb (0.5.3)
i18n (0.6.1)
- listen (0.6.0)
+ listen (0.7.0)
lumberjack (1.0.2)
method_source (0.8.1)
multi_json (1.5.0)
multi_xml (0.5.1)
multipart-post (1.1.5)
- nokogiri (1.5.6)
+ ox (1.8.1)
pry (0.9.10)
coderay (~> 1.0.5)
method_source (~> 0.8)
slop (~> 3.3.1)
rake (10.0.3)
- rb-fsevent (0.9.2)
+ rb-fsevent (0.9.3)
rb-inotify (0.8.8)
ffi (>= 0.5.0)
redcarpet (2.2.2)
View
@@ -2,28 +2,28 @@
require File.expand_path('../lib/eventful/version', __FILE__)
Gem::Specification.new do |gem|
- gem.authors = ["Tabeso Team"]
- gem.email = ["dev@tabeso.com"]
- gem.description = "Interface with Eventful.com API"
- gem.summary = ""
- gem.homepage = "https://github.com/tabeso/eventful-ruby"
+ gem.authors = ['Tabeso Team']
+ gem.email = ['dev@tabeso.com']
+ gem.description = 'Interface with Eventful.com API'
+ gem.summary = ''
+ gem.homepage = 'https://github.com/tabeso/eventful-ruby'
gem.files = `git ls-files`.split($\)
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
- gem.name = "eventful-ruby"
- gem.require_paths = ["lib"]
+ gem.name = 'eventful-ruby'
+ gem.require_paths = ['lib']
gem.version = Eventful::VERSION
gem.add_dependency 'activesupport', '~> 3.1'
- gem.add_dependency 'faraday', '>= 0.8.0', '< 0.9.0'
- gem.add_dependency 'faraday_middleware', '0.9.0'
- gem.add_dependency 'nokogiri', '~> 1.5.5'
+ gem.add_dependency 'faraday', '~> 0.8'
+ gem.add_dependency 'faraday_middleware', '~> 0.9'
gem.add_dependency 'multi_xml', '~> 0.5'
gem.add_dependency 'hashie', '~> 1.2.0'
# Feed streaming
gem.add_dependency 'em-http-request', '~> 1.0'
+ gem.add_dependency 'ox', '~> 1.8'
# Basic
gem.add_development_dependency 'rake'
@@ -46,4 +46,4 @@ Gem::Specification.new do |gem|
gem.add_development_dependency 'rb-fsevent'
gem.add_development_dependency 'rb-inotify'
gem.add_development_dependency 'growl'
-end
+end
@@ -1,26 +1,61 @@
require 'fiber'
-require 'nokogiri'
+require 'ox'
+require 'zlib'
module Eventful
module Feed
- class Document < Nokogiri::XML::SAX::Document
+ class Decoder
+ def initialize(io)
+ begin
+ @stream = Zlib::GzipReader.new(io, encoding: 'binary')
+ rescue Zlib::GzipFile::Error => e
+ # Fallback to original IO object.
+ @stream = io
+ @stream.rewind
+ end
+ end
+
+ def read(length = 0, buffer = '')
+ buffer << @stream.read(length).to_s
+ end
+ end
+
+ ##
+ # Handles parsing Eventful's GZip-compressed XML feeds. Provides an
+ # enumerator through {#resources}. By default, yields a hash rather than an
+ # instance of {Eventful::Resource}.
+ #
+ # @example Iterating over a feed with hashes.
+ # feed = Eventful::Feed::Document.open('events.xml.gz', :events)
+ # feed.resources.each do |event|
+ # puts event[:name]
+ # end
+ #
+ # @example Iterating over a feed with resource instances.
+ # feed.resources(load: true).each do |event|
+ # puts event.name
+ # end
+ class Document < Ox::Sax
attr_reader :resource_name
attr_reader :resource_class
def self.open(path, resource)
- new(resource).parse(File.open(path))
+ new(resource).parse(File.open(path, 'rb'))
end
def initialize(resource)
- @resource_name = resource.to_s.singularize
+ @resource_name = resource.to_s.singularize.to_sym
@resource_class = "Eventful::#{resource_name.capitalize}".constantize
end
def parse(io)
@parser = Fiber.new do
- require 'zlib'
- Nokogiri::XML::SAX::Parser.new(self).parse(Zlib::GzipReader.new(io))
+ begin
+ Ox.sax_parse(self, Decoder.new(io))
+ ensure
+ io.close
+ end
end
self
end
@@ -31,11 +66,11 @@ def start_element(name, attrs = [])
end
end
- def characters(string)
+ def text(string)
return unless in_resource?
- resource_stack.last.add_node(string) unless string.strip.length == 0 || resource_stack.empty?
+ resource_stack.last << string
end
- alias :cdata_block :characters
+ alias :cdata :text
def end_element(name)
return unless in_resource?
@@ -46,7 +81,7 @@ def end_element(name)
resource_stack.clear
elsif resource_stack.size > 1
last = resource_stack.pop
- resource_stack.last.add_node last
+ resource_stack.last << last
end
end
@@ -58,24 +93,21 @@ def resource_stack
@resource_stack ||= []
end
- def add_resource(data)
- Fiber.yield(build_resource(data))
+ def add_resource(node)
+ Fiber.yield(node.to_hash[resource_name])
end
- def resources
+ def resources(options = {})
Enumerator.new do |objects|
- while object = @parser.resume
- objects << object
+ while data = @parser.resume
+ objects << if options[:load]
+ resource_class.instantiate(data)
+ else
+ Hashie::Mash.new(data)
+ end
end
end
end
-
- private
-
- def build_resource(node)
- data = node.to_hash[resource_name]
- resource_class.instantiate(data)
- end
end
end
end
Oops, something went wrong.

0 comments on commit 2e8b4a9

Please sign in to comment.