Skip to content
This repository has been archived by the owner on May 14, 2022. It is now read-only.

Commit

Permalink
Only try to use languages supported by Tesseract
Browse files Browse the repository at this point in the history
  • Loading branch information
escowles committed Apr 26, 2016
1 parent 817c2fe commit 3c86b11
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 8 deletions.
14 changes: 7 additions & 7 deletions app/services/ocr_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ def params
end

def language
if parent.try(:ocr_language).blank? && parent.try(:language).blank?
"eng"
elsif parent.try(:ocr_language).blank?
parent.try(:language).join("+")
else
parent.try(:ocr_language).join("+")
end
return try_language(:ocr_language).join("+") unless try_language(:ocr_language).blank?
return try_language(:language).join("+") unless try_language(:language).blank?
"eng"
end

def try_language(field)
(parent.try(field) || []).select { |lang| !Tesseract.languages[lang.to_sym].nil? }
end

def parent
Expand Down
2 changes: 1 addition & 1 deletion app/views/curation_concerns/base/_iiif_fields.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
</div>

<div>
<%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse), input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %>
<%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse).sort, input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %>
</div>
<% end %>
</div>
15 changes: 15 additions & 0 deletions spec/services/ocr_runner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

before do
allow(file_set).to receive(:generic_works).and_return([parent])
allow(Tesseract).to receive(:languages).and_return(eng: ["English"], ita: ["Italian"], spa: ["Spanish"])
end

describe "#language" do
Expand All @@ -29,5 +30,19 @@
expect(subject.send(:language)).to eq('eng')
end
end

context "when ocr_language is an unsupported language" do
let(:parent) { FactoryGirl.build(:scanned_resource, ocr_language: ['xxx']) }
it "defaults to english" do
expect(subject.send(:language)).to eq('eng')
end
end

context "when ocr_language is an unsupported language, but language is supported" do
let(:parent) { FactoryGirl.build(:scanned_resource, language: ['spa'], ocr_language: ['xxx']) }
it "uses the supported language value" do
expect(subject.send(:language)).to eq('spa')
end
end
end
end

0 comments on commit 3c86b11

Please sign in to comment.