Skip to content

Commit

Permalink
Updated from tika-app-1.4.jar to tika-app-1.9.jar
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcsparker committed Jun 30, 2015
1 parent cd0d5ba commit 170290a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 20 deletions.
Binary file renamed ext/tika-app-1.4.jar → ext/tika-app-1.9.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion lib/ruby_tika_app.rb
Expand Up @@ -24,7 +24,7 @@ def initialize(document)

java_cmd = 'java'
java_args = '-server -Djava.awt.headless=true'
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.4.jar"
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.9.jar"

@tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
end
Expand Down
4 changes: 2 additions & 2 deletions ruby_tika_app.gemspec
Expand Up @@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)

Gem::Specification.new do |s|
s.name = 'ruby_tika_app'
s.version = '1.4.0'
s.version = '1.5.0'
s.platform = Gem::Platform::RUBY
s.authors = ['Chris Parker']
s.email = %w(mrcsparker@gmail.com)
Expand All @@ -23,7 +23,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency('open4')

s.add_development_dependency('rake')
s.add_development_dependency('rspec', '~> 2.13.0')
s.add_development_dependency('rspec', '~> 3.3.0')
s.add_development_dependency('bundler', '>= 1.0.15')
s.add_development_dependency('simplecov')
s.add_development_dependency('json')
Expand Down
34 changes: 17 additions & 17 deletions spec/ruby_tika_app_spec.rb
Expand Up @@ -16,14 +16,14 @@
expect {
rta = RubyTikaApp.new('No file')
rta.to_xml
}.to raise_error
}.to raise_error(RuntimeError)
end
end

describe '#to_xml' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_xml[0..37].should == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
expect(rta.to_xml[0..37]).to eq("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
end

it 'middle' do
Expand All @@ -32,81 +32,81 @@

xml_size = xml.size / 2

xml[xml_size..(xml_size + 100)].should == "ction IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected graphs, a"
expect(xml[xml_size..(xml_size + 100)]).to eq("plicated nodes make the node distribution converge\nto uniform distribution. We do not need to conside")
end
end

describe '#to_html' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_html[0..42].should == "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
expect(rta.to_html[0..42]).to eq("<html xmlns=\"http://www.w3.org/1999/xhtml\">")
end

it 'middle' do
rta = RubyTikaApp.new(@test_file)
rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph sampling simplex - 11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29"
expect(rta.to_html[1000 ... 1100]).to eq("on/pdf\"/>\n<meta name=\"X-Parsed-By\" content=\"org.apache.tika.parser.DefaultParser\"/>\n<meta name=\"X-Pa")
end
end

describe '#to_json' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_json[0..42].should == "{ \"Application\":\"\\u0027Certified by IEEE PD"
expect(rta.to_json[0..42]).to eq("{\"Application\":\"\\u0027Certified by IEEE PDF")
end

it 'middle' do
rta = RubyTikaApp.new(@test_file)
rta.to_json[100 ... 150].should == "h\":171510, \n\"Content-Type\":\"application/pdf\", \n\"Cr"
expect(rta.to_json[100 ... 150]).to eq("\"171510\",\"Content-Type\":\"application/pdf\",\"Creatio")
end
end

describe '#to_text' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_text[0..42].should == "Understanding Graph Sampling Algorithms\nfor"
expect(rta.to_text[0..42]).to eq("Understanding Graph Sampling Algorithms\nfor")
end

it 'middle' do
rta = RubyTikaApp.new(@test_file)
rta.to_text[100 ... 150].should == "n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin"
expect(rta.to_text[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin")
end
end

describe '#to_text_main' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_text_main[0..42].should == 'Understanding Graph Sampling Algorithms for'
expect(rta.to_text_main[0..42]).to eq('Understanding Graph Sampling Algorithms for')
end

it 'middle' do
rta = RubyTikaApp.new(@test_file)
rta.to_text_main[100 ... 150].should == "n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing"
expect(rta.to_text_main[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing")
end
end

describe '#to_metadata' do
it 'header' do
rta = RubyTikaApp.new(@test_file)
rta.to_metadata[0..42].should == "Application: 'Certified by IEEE PDFeXpress "
expect(rta.to_metadata[0..42]).to eq("Application: 'Certified by IEEE PDFeXpress ")
end

it 'middle' do
rta = RubyTikaApp.new(@test_file)
rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
expect(rta.to_metadata[100 ... 150]).to eq("Type: application/pdf\nCreation-Date: 2011-03-29T12")
end
end

describe 'external URLs' do
it 'should be able to parse an http url' do
rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
rta.to_text.should_not be_nil
rta.to_text.should eq(RubyTikaApp.new(@cnn_com_file).to_text)
expect(rta.to_text).to_not be_nil
expect(rta.to_text).to eq(RubyTikaApp.new(@cnn_com_file).to_text)
end

it 'should be able to parse another http url' do
rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
rta.to_text.should_not be_nil
rta.to_text.should eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
expect(rta.to_text).to_not be_nil
expect(rta.to_text).to eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
end
end

Expand Down

0 comments on commit 170290a

Please sign in to comment.