Permalink
Browse files

Merge branch 'tabledetection2' of github.com:jazzido/tabula into tabl…

…edetection2
  • Loading branch information...
2 parents 44065a3 + d8f4a3a commit e59fd9eb5f23ea015cd68809ebf3c49dd9bfc611 @mtigas mtigas committed Jul 10, 2013
@@ -14,7 +14,7 @@ def perform
pdf_file.close
page_areas_by_page = (0...page_count).map do |page_index|
- at( (page_count + page_index) / 2, page_count, "asdfasdf...") #starting at 50%...
+ at( (page_count + page_index) / 2, page_count, "auto-detecting tables...") #starting at 50%...
clean_lines = Tabula::Ruling::clean_rulings(Tabula::LSD::detect_lines_in_pdf_page(file, page_index))
page_areas = Tabula::TableGuesser::find_rects_from_lines(clean_lines)
page_areas.map!{|rect| rect.dims(:left, :top, :width, :height)}
@@ -2,7 +2,7 @@
require_relative '../../thumbnail_generator.rb'
class GenerateThumbnailJob < Tabula::Background::Job
- # args: (:file, :output_dir, :thumbnail_sizes, :page_index_job)
+ # args: (:file, :output_dir, :thumbnail_sizes, :page_index_job_uuid)
def perform
@@ -11,31 +11,31 @@ def perform
file = options[:file]
output_dir = options[:output_dir]
thumbnail_sizes = options[:thumbnail_sizes]
- page_index_job = options[:page_index_job]
- table_detection_job = options[:table_detection_job]
+ page_index_job_uuid = options[:page_index_job_uuid]
+ detect_tables_job_uuid = options[:detect_tables_job_uuid]
# return some status to browser
at(0, 100, "generating page thumbnails...")
generator = JPedalThumbnailGenerator.new(file, output_dir, thumbnail_sizes)
generator.add_observer(self, :at)
generator.generate_thumbnails!
- unless table_detection_job.nil?
- while !Tabula::Background::JobExecutor.get(table_detection_job).completed? do
- at(50, 100, "auto-detecting tables...",
+ unless detect_tables_job_uuid.nil?
+ detect_tables_job = Tabula::Background::JobExecutor.get(detect_tables_job_uuid)
+ while !detect_tables_job.completed? do
+ at(detect_tables_job.status['num'], detect_tables_job.status['total'], "auto-detecting tables...",
)
sleep 0.25
end
end
- while !Tabula::Background::JobExecutor.get(page_index_job).completed? do
+ while !Tabula::Background::JobExecutor.get(page_index_job_uuid).completed? do
at(99, 100, "generating page thumbnails...",
)
sleep 0.25
end
- at(100, 100, "complete",
- )
+ at(100, 100, "complete" )
end
end
@@ -55,7 +55,7 @@ def generate_thumbnails!
java.io.File.new(File.join(@output_directory,
"document_#{s}_#{i+1}.png")))
changed
- notify_observers(i+1, total_pages)
+ notify_observers(i+1, total_pages * 2)
end
end
@decoder.closePdfFile
View
@@ -151,7 +151,7 @@ def is_valid_pdf?(path)
file = File.join(file_path, 'document.pdf')
- # fire off background jobs
+ # fire off background jobs; in different orders if we're doing autodetection
document_metadata_job = GenerateDocumentMetadataJob.create(:filename => original_filename,
:id => file_id)
@@ -167,8 +167,8 @@ def is_valid_pdf?(path)
:output_dir => file_path)
upload_id = GenerateThumbnailJob.create(:file_id => file_id,
:file => file,
- :page_index_job => page_index_job,
- :table_detection_job => detect_tables_job,
+ :page_index_job_uuid => page_index_job,
+ :detect_tables_job_uuid => detect_tables_job,
:output_dir => file_path,
:thumbnail_sizes => [560])
res.redirect "/queue/#{upload_id}"

0 comments on commit e59fd9e

Please sign in to comment.