Skip to content

Commit

Permalink
Merge pull request #2 from andrusha/master
Browse files Browse the repository at this point in the history
Fixing errors on reading files with compound names
  • Loading branch information
mrcsparker committed Jan 9, 2013
2 parents 2b7e22f + fad8c86 commit 5e5487b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
14 changes: 8 additions & 6 deletions lib/ruby_tika_app.rb
Expand Up @@ -16,14 +16,13 @@ def initialize status
end

def initialize(document)

@document = document
@document = "file://#{document}"

java_cmd = 'java'
java_args = '-server -Djava.awt.headless=true'
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.2.jar"

@tika_cmd = "#{java_cmd} #{java_args} -jar #{tika_path}"
@tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
end

def to_xml
Expand Down Expand Up @@ -53,8 +52,7 @@ def to_metadata
private

def run_tika(option)

final_cmd = "#{@tika_cmd} #{option} #{@document}"
final_cmd = "#{@tika_cmd} #{option} '#{@document}'"
result = []


Expand All @@ -63,7 +61,7 @@ def run_tika(option)
stdout_result = stdout.read.strip
stderr_result = stderr.read.strip

unless stderr_result.strip == "" then
unless strip_stderr(stderr_result).empty?
raise(CommandFailedError.new(stderr_result),
"execution failed with status #{stderr_result}: #{final_cmd}")
end
Expand All @@ -75,4 +73,8 @@ def run_tika(option)
stderr.close
end

def strip_stderr(s)
s.gsub(/^(info|warn) - .*$/i, '').strip
end

end
File renamed without changes.
7 changes: 3 additions & 4 deletions spec/ruby_tika_app_spec.rb
Expand Up @@ -3,7 +3,7 @@
describe RubyTikaApp do

before(:each) do
@test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph_sampling_simplex11.pdf"
@test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph sampling simplex - 11.pdf"
end

describe "#to_xml" do
Expand All @@ -18,7 +18,7 @@

xml_size = xml.size / 2

xml[xml_size..(xml_size + 100)].should == "(Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected graphs"
xml[xml_size..(xml_size + 100)].should == "sides,\nMHRW performs better in well connected graphs than in\nloosely connected graphs, as it was orig"
end
end

Expand All @@ -30,7 +30,7 @@

it "middle" do
rta = RubyTikaApp.new(@test_file)
rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph_sampling_simplex11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29T13"
rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph sampling simplex - 11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29"
end
end

Expand Down Expand Up @@ -80,7 +80,6 @@
rta = RubyTikaApp.new(@test_file)
rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
end

end

end

0 comments on commit 5e5487b

Please sign in to comment.