Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'master' of github.com:louismullie/treat

Conflicts:
	Gemfile
  • Loading branch information...
commit e483b764e4847e48b39e91a77af8a8baa1a1d056 2 parents 3a367f7 + cf8acd2
@louismullie authored
View
1  Gemfile
@@ -4,6 +4,7 @@ gemspec
gem 'birch'
gem 'schiphol'
+gem 'yomu'
group :test do
gem 'rspec'
View
2  lib/treat/config/data/languages/german.rb
@@ -1,3 +1,5 @@
+#encoding: UTF-8
+
{
dependencies: [
'punkt-segmenter',
View
2  lib/treat/workers/formatters/readers/autoselect.rb
@@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Readers::Autoselect
ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/
ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
DefaultOptions = {
- :default_to => 'txt'
+ :default_to => 'document'
}
# Choose a reader to use.
View
17 lib/treat/workers/formatters/readers/document.rb
@@ -0,0 +1,17 @@
+require 'yomu'
+
+# This class is a wrapper for Yomu.
+# Yomu is a library for extracting text and metadata from files and documents
+# using the Apache Tika content analysis toolkit.
+class Treat::Workers::Formatters::Readers::Document
+ # Extract the readable text from any document.
+ #
+ # Options: none.
+ def self.read(document, options = {})
+ yomu = Yomu.new(document.file)
+
+ document.value = yomu.text
+ document.set :format, yomu.mimetype.extensions.first
+ document
+ end
+end
View
4 spec/entities/document.rb
@@ -9,10 +9,10 @@ module Treat::Specs::Entities
it "opens the file and reads its " +
"content into a document" do
f = Treat.paths.spec +
- 'workers/examples/english/mathematicians/leibniz.txt'
+ 'workers/examples/english/mathematicians/pythagoras.docx'
d = Treat::Entities::Document.build(f)
d.should be_an_instance_of Treat::Entities::Document
- d.to_s.index('Gottfried Leibniz').should_not eql nil
+ d.to_s.index('Pythagoras of Samos').should_not eql nil
end
end
View
BIN  spec/workers/examples/english/mathematicians/pythagoras.docx
Binary file not shown
View
3  treat.gemspec
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
# Runtime dependencies
s.add_runtime_dependency 'schiphol'
s.add_runtime_dependency 'birch'
+ s.add_runtime_dependency 'yomu'
# Development dependencies
s.add_development_dependency 'rspec'
@@ -36,4 +37,4 @@ Gem::Specification.new do |s|
To complete the installation, run `require treat` in an IRB
terminal, followed by `Treat::Core::Installer.install`. }
-end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.