Skip to content

Commit

Permalink
Merge pull request #14 from microsoft/laserprec/bugfix/silentDiskWrite
Browse files Browse the repository at this point in the history
Raise errors when writing to disk fails
  • Loading branch information
Jianjie Liu committed Jan 27, 2021
2 parents 1227eff + 9e4c37a commit 7cf054a
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 34 deletions.
38 changes: 21 additions & 17 deletions genalog/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,9 @@ def default(self, obj):

class AnalogDocumentGeneration(object):
def __init__(
self,
template_path=None,
styles=DEFAULT_STYLE_COMBINATION,
degradations=[],
resolution=300,
):
self,
template_path=None, styles=DEFAULT_STYLE_COMBINATION,
degradations=[], resolution=300):
self.doc_generator = DocumentGenerator(template_path=template_path)
self.doc_generator.set_styles_to_generate(styles)
self.degrader = Degrader(degradations)
Expand All @@ -42,8 +39,13 @@ def list_templates(self):
"""
return self.doc_generator.template_list

# Fix: rename to generate_sample()
# Add another method called generate_all_styles()
def generate_img(self, full_text_path, template, target_folder=None):
"""Generate synthetic images given the filepath of a text document
"""Generate a image with a sample style given a text document
NOTE: This does not generate all possible style combinations.
Use generate_all_styles() instead.
Arguments:
full_text_path {str} -- full filepath of a text document (i.e /dataset/doc.txt)
Expand All @@ -54,14 +56,20 @@ def generate_img(self, full_text_path, template, target_folder=None):
target_folder {str} -- folder path in which the generated images are stored
(default: {None})
resolution {int} -- resolution in dpi (default: {300})
Raises:
RuntimeError: when cannot write to disk at specified path
"""
with open(full_text_path, "r", encoding="utf8") as f: # read file
text = f.read()
content = CompositeContent([text], [ContentType.PARAGRAPH])

generator = self.doc_generator.create_generator(content, [template])
# Generate the image
doc = next(generator) # TODO: this does not exhaust all of the style combinations in the generator
try:
doc = next(generator) # NOTE: this does not exhaust all of the style combinations in the generator
except StopIteration:
return None
src = doc.render_array(resolution=self.resolution, channel="GRAYSCALE")
# Degrade the image
dst = self.degrader.apply_effects(src)
Expand All @@ -74,7 +82,8 @@ def generate_img(self, full_text_path, template, target_folder=None):
text_filename = os.path.basename(full_text_path)
img_filename = text_filename.replace(".txt", ".png")
img_dst_path = os.path.join(target_folder, "img", img_filename)
cv2.imwrite(img_dst_path, dst)
if not cv2.imwrite(img_dst_path, dst):
raise RuntimeError(f"Could not write to path {img_dst_path}")
return


Expand Down Expand Up @@ -115,14 +124,9 @@ def _set_batch_generate_args(


def generate_dataset_multiprocess(
input_text_files,
output_folder,
styles,
degradations,
template,
resolution=300,
batch_size=25,
):
input_text_files, output_folder,
styles, degradations, template,
resolution=300, batch_size=25):
_setup_folder(output_folder)
print(f"Storing generated images in {output_folder}")

Expand Down
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ flake8
flake8-import-order
pytest
pytest-cov
pytest-mock
pytest-lazy-fixture
tox
71 changes: 54 additions & 17 deletions tests/e2e/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import glob

import numpy as np
import pytest

from genalog import pipeline
from genalog.pipeline import AnalogDocumentGeneration, generate_dataset_multiprocess
from genalog.generation.document import DocumentGenerator

EXAMPLE_TEXT_FILE = "tests/unit/text/data/gt_1.txt"
Expand All @@ -18,33 +19,69 @@


@pytest.fixture
def default_analog_generator():
return pipeline.AnalogDocumentGeneration()
def default_doc_generator():
return AnalogDocumentGeneration()


@pytest.fixture
def custom_analog_generator():
return pipeline.AnalogDocumentGeneration(
styles=STYLES, degradations=DEGRATIONS, resolution=300
)
def custom_doc_generator():
return AnalogDocumentGeneration(styles=STYLES, degradations=DEGRATIONS, resolution=300)


@pytest.fixture
def empty_style_doc_generator():
return AnalogDocumentGeneration(styles={})


def test_default_generate_img(default_analog_generator):
assert len(default_analog_generator.list_templates()) > 0
example_template = default_analog_generator.list_templates()[0]
default_analog_generator.generate_img(
@pytest.mark.parametrize("doc_generator", [
pytest.lazy_fixture('default_doc_generator'),
pytest.lazy_fixture('custom_doc_generator')
])
def test_generate_img_array(doc_generator):
# Precondition checks
assert len(doc_generator.list_templates()) > 0

example_template = doc_generator.list_templates()[0]
sample_img = doc_generator.generate_img(
EXAMPLE_TEXT_FILE, example_template, target_folder=None
)
assert sample_img is not None
assert isinstance(sample_img, np.ndarray)


def test_generate_img_array_empty(empty_style_doc_generator):
# Precondition checks
assert len(empty_style_doc_generator.list_templates()) > 0

def test_custom_generate_img(custom_analog_generator):
assert len(custom_analog_generator.list_templates()) > 0
example_template = custom_analog_generator.list_templates()[0]
custom_analog_generator.generate_img(
example_template = empty_style_doc_generator.list_templates()[0]
sample_img = empty_style_doc_generator.generate_img(
EXAMPLE_TEXT_FILE, example_template, target_folder=None
)
assert sample_img is None


@pytest.mark.io
@pytest.mark.parametrize("doc_generator", [
pytest.lazy_fixture('default_doc_generator'),
pytest.lazy_fixture('custom_doc_generator')
])
def test_generate_img_write_to_disk(tmpdir, doc_generator):
os.makedirs(os.path.join(tmpdir, "img")) # TODO: generate_img() store image under "img" folder
output_img_wildcard = os.path.join(tmpdir, "img", "*.png")
num_generated_img = glob.glob(output_img_wildcard)
# Precondition checks
assert len(num_generated_img) == 0
assert len(doc_generator.list_templates()) > 0

example_template = doc_generator.list_templates()[0]
doc_generator.generate_img(
EXAMPLE_TEXT_FILE, example_template, target_folder=tmpdir
)
num_generated_img = glob.glob(output_img_wildcard) # look for any jpg on file
assert len(num_generated_img) > 0


@pytest.mark.io
@pytest.mark.parametrize("styles", [
STYLES,
pytest.param(
Expand All @@ -56,9 +93,9 @@ def test_custom_generate_img(custom_analog_generator):
def test_generate_dataset_multiprocess(tmpdir, folder_name, styles):
assert len(INPUT_TEXT_FILENAMES) > 0
output_folder = os.path.join(tmpdir, folder_name)
pipeline.generate_dataset_multiprocess(
generate_dataset_multiprocess(
INPUT_TEXT_FILENAMES, output_folder, styles, DEGRATIONS, "text_block.html.jinja"
)
num_generated_img = glob.glob(os.path.join(output_folder, "**/*.png"))
num_generated_img = glob.glob(os.path.join(output_folder, "**", "*.png"))
assert len(num_generated_img) > 0
assert len(num_generated_img) == len(INPUT_TEXT_FILENAMES) * len(DocumentGenerator.expand_style_combinations(styles))
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ markers =
# EX: pytest -m "not slow and not azure"
slow: marks tests as slow (deselect with '-m "not slow"')
azure: marks as integration tests that require azure resource
io: marks integration tests involving some form of I/O operations (disk, internet, etc)
testpaths =
tests
addopts =
Expand Down

0 comments on commit 7cf054a

Please sign in to comment.