diff --git a/app/services/ocr_runner.rb b/app/services/ocr_runner.rb index eb605323e..fc6f1a377 100644 --- a/app/services/ocr_runner.rb +++ b/app/services/ocr_runner.rb @@ -34,13 +34,13 @@ def params end def language - if parent.try(:ocr_language).blank? && parent.try(:language).blank? - "eng" - elsif parent.try(:ocr_language).blank? - parent.try(:language).join("+") - else - parent.try(:ocr_language).join("+") - end + return try_language(:ocr_language).join("+") unless try_language(:ocr_language).blank? + return try_language(:language).join("+") unless try_language(:language).blank? + "eng" + end + + def try_language(field) + (parent.try(field) || []).select { |lang| !Tesseract.languages[lang.to_sym].nil? } end def parent diff --git a/app/views/curation_concerns/base/_iiif_fields.html.erb b/app/views/curation_concerns/base/_iiif_fields.html.erb index 3d3a5e963..a2ca2316c 100644 --- a/app/views/curation_concerns/base/_iiif_fields.html.erb +++ b/app/views/curation_concerns/base/_iiif_fields.html.erb @@ -29,7 +29,7 @@
- <%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse), input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %> + <%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse).sort, input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %>
<% end %> diff --git a/spec/services/ocr_runner_spec.rb b/spec/services/ocr_runner_spec.rb index cad87c420..4d80f6757 100644 --- a/spec/services/ocr_runner_spec.rb +++ b/spec/services/ocr_runner_spec.rb @@ -6,6 +6,7 @@ before do allow(file_set).to receive(:generic_works).and_return([parent]) + allow(Tesseract).to receive(:languages).and_return(eng: ["English"], ita: ["Italian"], spa: ["Spanish"]) end describe "#language" do @@ -29,5 +30,19 @@ expect(subject.send(:language)).to eq('eng') end end + + context "when ocr_language is an unsupported language" do + let(:parent) { FactoryGirl.build(:scanned_resource, ocr_language: ['xxx']) } + it "defaults to english" do + expect(subject.send(:language)).to eq('eng') + end + end + + context "when ocr_language is an unsupported language, but language is supported" do + let(:parent) { FactoryGirl.build(:scanned_resource, language: ['spa'], ocr_language: ['xxx']) } + it "uses the supported language value" do + expect(subject.send(:language)).to eq('spa') + end + end end end