mindee · fg-mindee · Feb 9, 2021 · Feb 9, 2021 · Feb 9, 2021 · Feb 9, 2021
diff --git a/doctr/documents/reader.py b/doctr/documents/reader.py
@@ -3,9 +3,10 @@
 # This program is licensed under the Apache License version 2.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
-import fitz
 import numpy as np
 import cv2
+from pathlib import Path
+import fitz
 from typing import List, Tuple, Optional, Any
 
 __all__ = ['read_pdf', 'read_img']
@@ -30,7 +31,13 @@ def read_img(
         the page decoded as numpy ndarray of shape H x W x 3
     """
 
+    if not Path(file_path).is_file():
+        raise FileNotFoundError(f"unable to access {file_path}")
+
     img = cv2.imread(file_path, cv2.IMREAD_COLOR)
+    # Validity check
+    if img is None:
+        raise ValueError("unable to read file.")
     # Resizing
     if isinstance(output_size, tuple):
         img = cv2.resize(img, output_size[::-1], interpolation=cv2.INTER_LINEAR)

diff --git a/test/test_documents.py b/test/test_documents.py
@@ -216,7 +216,7 @@ def test_read_pdf(mock_pdf):
     assert all(page.dtype == np.uint8 for page in doc_tensors)
 
 
-def test_read_img(tmpdir_factory):
+def test_read_img(tmpdir_factory, mock_pdf):
 
     url = 'https://upload.wikimedia.org/wikipedia/commons/5/55/Grace_Hopper.jpg'
     file = BytesIO(requests.get(url).content)
@@ -240,3 +240,10 @@ def test_read_img(tmpdir_factory):
     target_size = (200, 150)
     resized_page = documents.reader.read_img(tmp_path, target_size)
     assert resized_page.shape[:2] == target_size
+
+    # Non-existing file
+    with pytest.raises(FileNotFoundError):
+        documents.reader.read_img("my_imaginary_file.jpg")
+    # Invalid image
+    with pytest.raises(ValueError):
+        documents.reader.read_img(mock_pdf)