Merge pull request #14 from microsoft/laserprec/bugfix/silentDiskWrite

Raise errors when writing to disk fails
microsoft · Jan 27, 2021 · 7cf054a · 7cf054a
2 parents 1227eff + 9e4c37a
commit 7cf054a
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 34 deletions.
diff --git a/genalog/pipeline.py b/genalog/pipeline.py
@@ -22,12 +22,9 @@ def default(self, obj):
 
 class AnalogDocumentGeneration(object):
     def __init__(
-        self,
-        template_path=None,
-        styles=DEFAULT_STYLE_COMBINATION,
-        degradations=[],
-        resolution=300,
-    ):
+            self,
+            template_path=None, styles=DEFAULT_STYLE_COMBINATION,
+            degradations=[], resolution=300):
         self.doc_generator = DocumentGenerator(template_path=template_path)
         self.doc_generator.set_styles_to_generate(styles)
         self.degrader = Degrader(degradations)
@@ -42,8 +39,13 @@ def list_templates(self):
         """
         return self.doc_generator.template_list
 
+    # Fix: rename to generate_sample()
+    # Add another method called generate_all_styles()
     def generate_img(self, full_text_path, template, target_folder=None):
-        """Generate synthetic images given the filepath of a text document
+        """Generate a image with a sample style given a text document
+
+        NOTE: This does not generate all possible style combinations.
+        Use generate_all_styles() instead.
 
         Arguments:
             full_text_path {str} -- full filepath of a text document (i.e /dataset/doc.txt)
@@ -54,14 +56,20 @@ def generate_img(self, full_text_path, template, target_folder=None):
             target_folder {str} -- folder path in which the generated images are stored
                 (default: {None})
             resolution {int} -- resolution in dpi (default: {300})
+
+        Raises:
+            RuntimeError: when cannot write to disk at specified path
         """
         with open(full_text_path, "r", encoding="utf8") as f:  # read file
             text = f.read()
         content = CompositeContent([text], [ContentType.PARAGRAPH])
 
         generator = self.doc_generator.create_generator(content, [template])
         # Generate the image
-        doc = next(generator)  # TODO: this does not exhaust all of the style combinations in the generator
+        try:
+            doc = next(generator)  # NOTE: this does not exhaust all of the style combinations in the generator
+        except StopIteration:
+            return None
         src = doc.render_array(resolution=self.resolution, channel="GRAYSCALE")
         # Degrade the image
         dst = self.degrader.apply_effects(src)
@@ -74,7 +82,8 @@ def generate_img(self, full_text_path, template, target_folder=None):
             text_filename = os.path.basename(full_text_path)
             img_filename = text_filename.replace(".txt", ".png")
             img_dst_path = os.path.join(target_folder, "img", img_filename)
-            cv2.imwrite(img_dst_path, dst)
+            if not cv2.imwrite(img_dst_path, dst):
+                raise RuntimeError(f"Could not write to path {img_dst_path}")
             return
 
 
@@ -115,14 +124,9 @@ def _set_batch_generate_args(
 
 
 def generate_dataset_multiprocess(
-    input_text_files,
-    output_folder,
-    styles,
-    degradations,
-    template,
-    resolution=300,
-    batch_size=25,
-):
+        input_text_files, output_folder,
+        styles, degradations, template,
+        resolution=300, batch_size=25):
     _setup_folder(output_folder)
     print(f"Storing generated images in {output_folder}")
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,4 +2,6 @@ flake8
 flake8-import-order
 pytest
 pytest-cov
+pytest-mock
+pytest-lazy-fixture
 tox
diff --git a/tests/e2e/test_pipeline.py b/tests/e2e/test_pipeline.py
@@ -1,9 +1,10 @@
 import os
 import glob
 
+import numpy as np
 import pytest
 
-from genalog import pipeline
+from genalog.pipeline import AnalogDocumentGeneration, generate_dataset_multiprocess
 from genalog.generation.document import DocumentGenerator
 
 EXAMPLE_TEXT_FILE = "tests/unit/text/data/gt_1.txt"
@@ -18,33 +19,69 @@
 
 
 @pytest.fixture
-def default_analog_generator():
-    return pipeline.AnalogDocumentGeneration()
+def default_doc_generator():
+    return AnalogDocumentGeneration()
 
 
 @pytest.fixture
-def custom_analog_generator():
-    return pipeline.AnalogDocumentGeneration(
-        styles=STYLES, degradations=DEGRATIONS, resolution=300
-    )
+def custom_doc_generator():
+    return AnalogDocumentGeneration(styles=STYLES, degradations=DEGRATIONS, resolution=300)
+
+
+@pytest.fixture
+def empty_style_doc_generator():
+    return AnalogDocumentGeneration(styles={})
 
 
-def test_default_generate_img(default_analog_generator):
-    assert len(default_analog_generator.list_templates()) > 0
-    example_template = default_analog_generator.list_templates()[0]
-    default_analog_generator.generate_img(
+@pytest.mark.parametrize("doc_generator", [
+    pytest.lazy_fixture('default_doc_generator'),
+    pytest.lazy_fixture('custom_doc_generator')
+])
+def test_generate_img_array(doc_generator):
+    # Precondition checks
+    assert len(doc_generator.list_templates()) > 0
+
+    example_template = doc_generator.list_templates()[0]
+    sample_img = doc_generator.generate_img(
         EXAMPLE_TEXT_FILE, example_template, target_folder=None
     )
+    assert sample_img is not None
+    assert isinstance(sample_img, np.ndarray)
+
 
+def test_generate_img_array_empty(empty_style_doc_generator):
+    # Precondition checks
+    assert len(empty_style_doc_generator.list_templates()) > 0
 
-def test_custom_generate_img(custom_analog_generator):
-    assert len(custom_analog_generator.list_templates()) > 0
-    example_template = custom_analog_generator.list_templates()[0]
-    custom_analog_generator.generate_img(
+    example_template = empty_style_doc_generator.list_templates()[0]
+    sample_img = empty_style_doc_generator.generate_img(
         EXAMPLE_TEXT_FILE, example_template, target_folder=None
     )
+    assert sample_img is None
+
+
+@pytest.mark.io
+@pytest.mark.parametrize("doc_generator", [
+    pytest.lazy_fixture('default_doc_generator'),
+    pytest.lazy_fixture('custom_doc_generator')
+])
+def test_generate_img_write_to_disk(tmpdir, doc_generator):
+    os.makedirs(os.path.join(tmpdir, "img"))  # TODO: generate_img() store image under "img" folder
+    output_img_wildcard = os.path.join(tmpdir, "img", "*.png")
+    num_generated_img = glob.glob(output_img_wildcard)
+    # Precondition checks
+    assert len(num_generated_img) == 0
+    assert len(doc_generator.list_templates()) > 0
+
+    example_template = doc_generator.list_templates()[0]
+    doc_generator.generate_img(
+        EXAMPLE_TEXT_FILE, example_template, target_folder=tmpdir
+    )
+    num_generated_img = glob.glob(output_img_wildcard)  # look for any jpg on file
+    assert len(num_generated_img) > 0
 
 
+@pytest.mark.io
 @pytest.mark.parametrize("styles", [
     STYLES,
     pytest.param(
@@ -56,9 +93,9 @@ def test_custom_generate_img(custom_analog_generator):
 def test_generate_dataset_multiprocess(tmpdir, folder_name, styles):
     assert len(INPUT_TEXT_FILENAMES) > 0
     output_folder = os.path.join(tmpdir, folder_name)
-    pipeline.generate_dataset_multiprocess(
+    generate_dataset_multiprocess(
         INPUT_TEXT_FILENAMES, output_folder, styles, DEGRATIONS, "text_block.html.jinja"
     )
-    num_generated_img = glob.glob(os.path.join(output_folder, "**/*.png"))
+    num_generated_img = glob.glob(os.path.join(output_folder, "**", "*.png"))
     assert len(num_generated_img) > 0
     assert len(num_generated_img) == len(INPUT_TEXT_FILENAMES) * len(DocumentGenerator.expand_style_combinations(styles))
diff --git a/tox.ini b/tox.ini
@@ -34,6 +34,7 @@ markers =
     # EX: pytest -m "not slow and not azure"
     slow: marks tests as slow (deselect with '-m "not slow"')
     azure: marks as integration tests that require azure resource
+    io: marks integration tests involving some form of I/O operations (disk, internet, etc)
 testpaths =
     tests
 addopts =