Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_han…

…dling Clarify forgiving ocr handling
the-paperless-project · Oct 8, 2018 · bd95804 · bd95804
2 parents 8dc355a + 818780a
commit bd95804
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 1 deletion.
diff --git a/paperless.conf.example b/paperless.conf.example
@@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false"
 #PAPERLESS_CONSUMER_LOOP_TIME=10
 
 
+# By default Paperless stops consuming a document if no language can be detected.
+# Set to true to consume documents even if the language detection fails.
+#PAPERLESS_FORGIVING_OCR="false"
+
+
 ###############################################################################
 ####                            Interface                                  ####
 ###############################################################################

diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py
@@ -153,7 +153,10 @@ def _get_ocr(self, imgs):
                 )
                 raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                 return raw_text
-            raise OCRError("Language detection failed")
+            error_msg = ("Language detection failed. Set "
+                         "PAPERLESS_FORGIVING_OCR in config file to continue "
+                         "anyway.")
+            raise OCRError(error_msg)
 
         if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
             raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)