diff --git a/app/services/ocr_runner.rb b/app/services/ocr_runner.rb
index eb605323e..fc6f1a377 100644
--- a/app/services/ocr_runner.rb
+++ b/app/services/ocr_runner.rb
@@ -34,13 +34,13 @@ def params
end
def language
- if parent.try(:ocr_language).blank? && parent.try(:language).blank?
- "eng"
- elsif parent.try(:ocr_language).blank?
- parent.try(:language).join("+")
- else
- parent.try(:ocr_language).join("+")
- end
+ return try_language(:ocr_language).join("+") unless try_language(:ocr_language).blank?
+ return try_language(:language).join("+") unless try_language(:language).blank?
+ "eng"
+ end
+
+ def try_language(field)
+ (parent.try(field) || []).select { |lang| !Tesseract.languages[lang.to_sym].nil? }
end
def parent
diff --git a/app/views/curation_concerns/base/_iiif_fields.html.erb b/app/views/curation_concerns/base/_iiif_fields.html.erb
index 3d3a5e963..a2ca2316c 100644
--- a/app/views/curation_concerns/base/_iiif_fields.html.erb
+++ b/app/views/curation_concerns/base/_iiif_fields.html.erb
@@ -29,7 +29,7 @@
- <%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse), input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %>
+ <%= f.input :ocr_language, collection: Tesseract.languages.map(&:reverse).sort, input_html: { name: "#{f.object.model_name.singular}[ocr_language][]", multiple: true, class: 'resource-radio-button' } %>
<% end %>
diff --git a/spec/services/ocr_runner_spec.rb b/spec/services/ocr_runner_spec.rb
index cad87c420..4d80f6757 100644
--- a/spec/services/ocr_runner_spec.rb
+++ b/spec/services/ocr_runner_spec.rb
@@ -6,6 +6,7 @@
before do
allow(file_set).to receive(:generic_works).and_return([parent])
+ allow(Tesseract).to receive(:languages).and_return(eng: ["English"], ita: ["Italian"], spa: ["Spanish"])
end
describe "#language" do
@@ -29,5 +30,19 @@
expect(subject.send(:language)).to eq('eng')
end
end
+
+ context "when ocr_language is an unsupported language" do
+ let(:parent) { FactoryGirl.build(:scanned_resource, ocr_language: ['xxx']) }
+ it "defaults to english" do
+ expect(subject.send(:language)).to eq('eng')
+ end
+ end
+
+ context "when ocr_language is an unsupported language, but language is supported" do
+ let(:parent) { FactoryGirl.build(:scanned_resource, language: ['spa'], ocr_language: ['xxx']) }
+ it "uses the supported language value" do
+ expect(subject.send(:language)).to eq('spa')
+ end
+ end
end
end