Permalink
Browse files

Merge branch 'master' of github.com:louismullie/treat

Conflicts:
	Gemfile
  • Loading branch information...
louismullie committed Jun 3, 2013
2 parents 3a367f7 + cf8acd2 commit e483b764e4847e48b39e91a77af8a8baa1a1d056
View
@@ -4,6 +4,7 @@ gemspec
gem 'birch'
gem 'schiphol'
gem 'yomu'
group :test do
gem 'rspec'
@@ -1,3 +1,5 @@
#encoding: UTF-8
{
dependencies: [
'punkt-segmenter',
@@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Readers::Autoselect
ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/
ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
DefaultOptions = {
:default_to => 'txt'
:default_to => 'document'
}
# Choose a reader to use.
@@ -0,0 +1,17 @@
require 'yomu'
# This class is a wrapper for Yomu.
# Yomu is a library for extracting text and metadata from files and documents
# using the Apache Tika content analysis toolkit.
class Treat::Workers::Formatters::Readers::Document
# Extract the readable text from any document.
#
# Options: none.
def self.read(document, options = {})
yomu = Yomu.new(document.file)
document.value = yomu.text
document.set :format, yomu.mimetype.extensions.first
document
end
end
@@ -9,10 +9,10 @@ module Treat::Specs::Entities
it "opens the file and reads its " +
"content into a document" do
f = Treat.paths.spec +
'workers/examples/english/mathematicians/leibniz.txt'
'workers/examples/english/mathematicians/pythagoras.docx'
d = Treat::Entities::Document.build(f)
d.should be_an_instance_of Treat::Entities::Document
d.to_s.index('Gottfried Leibniz').should_not eql nil
d.to_s.index('Pythagoras of Samos').should_not eql nil
end
end
Binary file not shown.
View
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
# Runtime dependencies
s.add_runtime_dependency 'schiphol'
s.add_runtime_dependency 'birch'
s.add_runtime_dependency 'yomu'
# Development dependencies
s.add_development_dependency 'rspec'
@@ -36,4 +37,4 @@ Gem::Specification.new do |s|
To complete the installation, run `require treat` in an IRB
terminal, followed by `Treat::Core::Installer.install`. }
end
end

0 comments on commit e483b76

Please sign in to comment.