Skip to content

Commit

Permalink
Merge pull request #12 from microsoft/laserprec/use_tox
Browse files Browse the repository at this point in the history
Use tox in CI pipeline
  • Loading branch information
Jianjie Liu committed Jan 26, 2021
2 parents 8da3807 + 1ea5dd4 commit d738bbb
Show file tree
Hide file tree
Showing 72 changed files with 145 additions and 61 deletions.
4 changes: 2 additions & 2 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
genalog/degradation/ @laserprec
genalog/generation/ @laserprec
genalog/text/ @laserprec
genalog/ocr/ @dbanda @laserprec
genalog/ocr/ @laserprec

tests/ @laserprec @dbanda
tests/ @laserprec
6 changes: 6 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include *.md
include *.txt
include LICENSE CODEOWNERS
include .gitignore tox.ini MANIFEST.in
recursive-include genalog *.py *.jinja
recursive-include tests *.py *.jinja *.jpg
4 changes: 2 additions & 2 deletions devops/pr-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ steps:
displayName: 'Install dependencies'

- bash: |
python -m flake8
tox -e flake8
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run Linter (flake8)'

- bash: |
python -m pytest tests
tox -e py
env:
BLOB_KEY : $(BLOB_KEY)
SEARCH_SERVICE_KEY: $(SEARCH_SERVICE_KEY)
Expand Down
4 changes: 2 additions & 2 deletions genalog/ocr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ Example usage:
```python
from genalog.ocr.grok import Grok
from dotenv import load_dotenv
load_dotenv("tests/ocr/.env")
load_dotenv("tests/unit/ocr/.env")

grok = Grok.create_from_env_var()
grok.run_grok(src_folder_path = "tests/ocr/data/img", dest_folder_path = "tests/ocr/data/json")
grok.run_grok(src_folder_path = "tests/unit/ocr/data/img", dest_folder_path = "tests/unit/ocr/data/json")
```

5 changes: 3 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
flake8
flake8-import-order
pytest
pytest-cov
flake8
flake8-import-order
tox
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
long_description=long_description,
long_description_content_type="text/markdown",
url='https://msazure.visualstudio.com/DefaultCollection/Cognitive%20Services/_git/Tools-Synthetic-Data-Generator',
packages=setuptools.find_packages(),
packages=setuptools.find_packages(exclude=['tests', 'tests.*']),
package_data={'': [
'genalog/generation/templates/*.jinja'
]},
include_package_data=True,
classifiers=[
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
Expand Down
File renamed without changes.
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import logging
import os

import pytest
from dotenv import load_dotenv

from tests.required_env import RequiredEnvVar

ENV_FILEPATH = "tests/.env"


@pytest.fixture(scope="session")
def load_azure_resources():
# Loading the non-secrets
load_dotenv(ENV_FILEPATH)
logging.info(f"Loading .env from {ENV_FILEPATH}")
logging.debug("Printing environment vars: ")
for env in RequiredEnvVar:
logging.debug(f"\t{env.value}: {os.environ.get(env.value)}")
10 changes: 6 additions & 4 deletions tests/e2e/test_anchor_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from genalog.text import alignment, anchor, preprocess


@pytest.mark.slow
@pytest.mark.parametrize(
"gt_file, ocr_file",
zip(
sorted(glob.glob("tests/text/data/gt_*.txt")),
sorted(glob.glob("tests/text/data/ocr_*.txt")),
sorted(glob.glob("tests/unit/text/data/gt_*.txt")),
sorted(glob.glob("tests/unit/text/data/ocr_*.txt")),
),
)
def test_align_w_anchor_and_align(gt_file, ocr_file):
Expand All @@ -34,11 +35,12 @@ def test_align_w_anchor_and_align(gt_file, ocr_file):
)


@pytest.mark.slow
@pytest.mark.parametrize(
"gt_file, ocr_file",
zip(
sorted(glob.glob("tests/text/data/gt_*.txt")),
sorted(glob.glob("tests/text/data/ocr_*.txt")),
sorted(glob.glob("tests/unit/text/data/gt_*.txt")),
sorted(glob.glob("tests/unit/text/data/ocr_*.txt")),
),
)
@pytest.mark.parametrize("max_seg_length", [25, 50, 75, 100, 150])
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/test_conll_format_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from genalog.text import conll_format


@pytest.mark.slow
@pytest.mark.parametrize(
"required_args", [(["tests/e2e/data/synthetic_dataset", "test_version"])]
)
Expand All @@ -27,6 +28,7 @@ def test_conll_format(required_args, optional_args):
basepath = "tests/e2e/data/conll_formatter/"


@pytest.mark.slow
@pytest.mark.parametrize(
"clean_label_filename, ocr_text_filename",
zip(
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/test_document_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
UNSUPPORTED_CONTENT_FORMAT = ["foo bar"]
UNSUPPORTED_CONTENT_TYPE = CompositeContent(["foo"], [ContentType.TITLE])

CUSTOM_TEMPLATE_PATH = "tests/generation/templates"
CUSTOM_TEMPLATE_PATH = "tests/unit/generation/templates"
CUSTOM_TEMPLATE_NAME = "mock.html.jinja"
CUSTOM_STYLE_TEMPLATE_NAME = "font_family.html.jinja"
MULTI_PAGE_TEMPLATE_NAME = "multipage.html.jinja"
Expand Down
14 changes: 10 additions & 4 deletions tests/e2e/test_ocr_e2e.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
import json

import pytest
from dotenv import load_dotenv

from genalog.ocr.blob_client import GrokBlobClient
from genalog.ocr.grok import Grok

load_dotenv("tests/ocr/.env")

@pytest.fixture(scope="module", autouse=True)
def load_azure_config(load_azure_resources):
# Loading the non-secrets
# Assume the secrets are set in the environment variable prior
pass


@pytest.mark.azure
class TestBlobClient:
@pytest.mark.parametrize("use_async", [True, False])
def test_upload_images(self, use_async):
blob_client = GrokBlobClient.create_from_env_var()
subfolder = "tests/ocr/data/img"
subfolder = "tests/unit/ocr/data/img"
subfolder.replace("/", "_")
dst_folder, _ = blob_client.upload_images_to_blob(
subfolder, use_async=use_async
Expand Down Expand Up @@ -43,11 +48,12 @@ def test_upload_images(self, use_async):
), f"folder {dst_folder} was not deleted"


@pytest.mark.azure
class TestGROKe2e:
@pytest.mark.parametrize("use_async", [False, True])
def test_grok_e2e(self, tmpdir, use_async):
grok = Grok.create_from_env_var()
src_folder = "tests/ocr/data/img"
src_folder = "tests/unit/ocr/data/img"
grok.run_grok(
src_folder,
tmpdir,
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from genalog import pipeline

EXAMPLE_TEXT_FILE = "tests/text/data/gt_1.txt"
EXAMPLE_TEXT_FILE = "tests/unit/text/data/gt_1.txt"


@pytest.fixture
Expand Down Expand Up @@ -36,7 +36,7 @@ def test_custom_generate_img(custom_analog_generator):


def test_generate_dataset_multiprocess():
INPUT_TEXT_FILENAMES = glob.glob("tests/text/data/gt_*.txt")
INPUT_TEXT_FILENAMES = glob.glob("tests/unit/text/data/gt_*.txt")
with pytest.deprecated_call():
pipeline.generate_dataset_multiprocess(
INPUT_TEXT_FILENAMES, "test_out", {}, [], "text_block.html.jinja"
Expand Down
1 change: 0 additions & 1 deletion tests/ocr/data/substitution.json

This file was deleted.

24 changes: 24 additions & 0 deletions tests/required_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from enum import Enum
from itertools import chain


class RequiredSecrets(Enum):
BLOB_KEY = 'BLOB_KEY'
SEARCH_SERVICE_KEY = 'SEARCH_SERVICE_KEY'
COGNITIVE_SERVICE_KEY = 'COGNITIVE_SERVICE_KEY'


class RequiredConstants(Enum):
COMPUTER_VISION_ENDPOINT = 'COMPUTER_VISION_ENDPOINT'
SEARCH_SERVICE_NAME = 'SEARCH_SERVICE_NAME'
SKILLSET_NAME = 'SKILLSET_NAME'
INDEX_NAME = "INDEX_NAME"
INDEXER_NAME = "INDEXER_NAME"
DATASOURCE_NAME = "DATASOURCE_NAME"
DATASOURCE_CONTAINER_NAME = "DATASOURCE_CONTAINER_NAME"
BLOB_NAME = "BLOB_NAME"


RequiredEnvVar = Enum("RequiredEnvVar", [
(i.name, i.value) for i in chain(RequiredSecrets, RequiredConstants)
])
File renamed without changes.
Empty file added tests/unit/cases/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes
Empty file.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
MOCK_TEMPLATE = MagicMock()
MOCK_TEMPLATE.render.return_value = MOCK_COMPILED_DOCUMENT

IMG_BYTES = open("tests/generation/2x2.jpg", "rb").read()
IMG_BYTES = open("tests/unit/generation/2x2.jpg", "rb").read()

FILE_DESTINATION_PDF = "sample.pdf"
FILE_DESTINATION_PNG = "sample.png"

CUSTOM_TEMPLATE_PATH = "tests/generation/templates"
CUSTOM_TEMPLATE_PATH = "tests/unit/generation/templates"
CUSTOM_TEMPLATE_NAME = "mock.html.jinja"
DEFAULT_TEMPLATE_NAME = "text_block.html.jinja"
DEFAULT_PACKAGE_NAME = "genalog.generation"
Expand Down
Empty file added tests/unit/ocr/__init__.py
Empty file.
File renamed without changes
File renamed without changes
File renamed without changes
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
edit_insert,edit_delete,edit_replace,edit_insert_spacing,edit_delete_spacing,insert,delete,replace,spacing,total_chars,total_words,total_alnum_words,matching_chars,matching_alnum_words,matching_words,alnum_word_accuracy,word_accuracy,char_accuracy,txt_path,ocr_json_path,filename
1,0,0,1,13,1,0,0,14,1027,166,159,1025,144,150,0.9056603773584906,0.9036144578313253,0.9980525803310614,tests/ocr/data/text/0.txt,tests/ocr/data/json/521c38122f783673598856cd81d91c21_0.png.json,0.txt
3,0,0,0,5,3,0,0,5,958,182,176,955,165,171,0.9375,0.9395604395604396,0.9968684759916493,tests/ocr/data/text/1.txt,tests/ocr/data/json/521c38122f783673598856cd81d91c21_1.png.json,1.txt
2,0,0,0,9,2,0,0,9,1022,188,183,1020,170,175,0.9289617486338798,0.9308510638297872,0.9980430528375733,tests/ocr/data/text/11.txt,tests/ocr/data/json/521c38122f783673598856cd81d91c21_11.png.json,11.txt
1,0,0,1,13,1,0,0,14,1027,166,159,1025,144,150,0.9056603773584906,0.9036144578313253,0.9980525803310614,tests/unit/ocr/data/text/0.txt,tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_0.png.json,0.txt
3,0,0,0,5,3,0,0,5,958,182,176,955,165,171,0.9375,0.9395604395604396,0.9968684759916493,tests/unit/ocr/data/text/1.txt,tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_1.png.json,1.txt
2,0,0,0,9,2,0,0,9,1022,188,183,1020,170,175,0.9289617486338798,0.9308510638297872,0.9980430528375733,tests/unit/ocr/data/text/11.txt,tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_11.png.json,11.txt
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
edit_insert,edit_delete,edit_replace,edit_insert_spacing,edit_delete_spacing,insert,delete,replace,spacing,total_chars,total_words,total_alnum_words,matching_chars,matching_alnum_words,matching_words,alnum_word_accuracy,word_accuracy,char_accuracy,txt_path,ocr_json_path,filename
2,5,5,0,2,1,1,5,2,1068,176,176,1061,169,169,0.9602272727272727,0.9602272727272727,0.9934456928838952,tests/ocr/data/metrics/text/001.txt,tests/ocr/data/metrics/json/123_001.png.json,001.txt
0,5,17,0,11,0,2,8,11,1789,301,301,1772,283,283,0.9401993355481728,0.9401993355481728,0.9904974846282839,tests/ocr/data/metrics/text/002.txt,tests/ocr/data/metrics/json/123_002.png.json,002.txt
0,1,6,0,17,0,0,5,17,2659,460,459,2653,436,437,0.9498910675381264,0.95,0.9977435125987213,tests/ocr/data/metrics/text/003.txt,tests/ocr/data/metrics/json/123_003.png.json,003.txt
2,5,5,0,2,1,1,5,2,1068,176,176,1061,169,169,0.9602272727272727,0.9602272727272727,0.9934456928838952,tests/unit/ocr/data/metrics/text/001.txt,tests/unit/ocr/data/metrics/json/123_001.png.json,001.txt
0,5,17,0,11,0,2,8,11,1789,301,301,1772,283,283,0.9401993355481728,0.9401993355481728,0.9904974846282839,tests/unit/ocr/data/metrics/text/002.txt,tests/unit/ocr/data/metrics/json/123_002.png.json,002.txt
0,1,6,0,17,0,0,5,17,2659,460,459,2653,436,437,0.9498910675381264,0.95,0.9977435125987213,tests/unit/ocr/data/metrics/text/003.txt,tests/unit/ocr/data/metrics/json/123_003.png.json,003.txt
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions tests/unit/ocr/data/substitution.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"tests/unit/ocr/data/text\\0.txt": {}, "tests/unit/ocr/data/text\\1.txt": {}, "tests/unit/ocr/data/text\\11.txt": {}}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 9 additions & 5 deletions tests/ocr/test_ocr.py → tests/unit/ocr/test_ocr.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import json
import os

import pytest
import requests
from dotenv import load_dotenv

from genalog.ocr.rest_client import GrokRestClient


load_dotenv("tests/ocr/.env")
@pytest.fixture(scope="module", autouse=True)
def set_azure_dummy_secrets(load_azure_resources):
os.environ['BLOB_KEY'] = "<YOUR BLOB KEY>"
os.environ['SEARCH_SERVICE_KEY'] = "<YOUR SEARCH SERVICE KEY>"
os.environ['COGNITIVE_SERVICE_KEY'] = "<YOUR COGNITIVE SERVICE KEY>"


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -46,7 +50,7 @@ def json(self):
"metadata_storage_name": "521c38122f783673598856cd81d91c21_0.png",
"layoutText": json.load(
open(
"tests/ocr/data/json/521c38122f783673598856cd81d91c21_0.png.json",
"tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_0.png.json",
"r",
)
),
Expand All @@ -55,7 +59,7 @@ def json(self):
"metadata_storage_name": "521c38122f783673598856cd81d91c21_1.png",
"layoutText": json.load(
open(
"tests/ocr/data/json/521c38122f783673598856cd81d91c21_1.png.json",
"tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_1.png.json",
"r",
)
),
Expand All @@ -64,7 +68,7 @@ def json(self):
"metadata_storage_name": "521c38122f783673598856cd81d91c21_11.png",
"layoutText": json.load(
open(
"tests/ocr/data/json/521c38122f783673598856cd81d91c21_11.png.json",
"tests/unit/ocr/data/json/521c38122f783673598856cd81d91c21_11.png.json",
"r",
)
),
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pytest

from genalog.text import alignment
from tests.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES
from tests.cases.text_alignment import PARSE_ALIGNMENT_REGRESSION_TEST_CASES
from tests.unit.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES
from tests.unit.cases.text_alignment import PARSE_ALIGNMENT_REGRESSION_TEST_CASES

RANDOM_INT = randint(1, 100)
MOCK_ALIGNMENT_RESULT = [("X", "X", 0, 0, 1)]
Expand Down
6 changes: 3 additions & 3 deletions tests/text/test_anchor.py → tests/unit/text/test_anchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from genalog.text import alignment, anchor, preprocess
from tests.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES
from tests.unit.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES


@pytest.mark.parametrize(
Expand Down Expand Up @@ -200,8 +200,8 @@ def test_find_anchor_recur_fixed_seg_len(
@pytest.mark.parametrize(
"gt_file, ocr_file",
zip(
sorted(glob.glob("tests/text/data/gt_1.txt")),
sorted(glob.glob("tests/text/data/ocr_1.txt")),
sorted(glob.glob("tests/unit/text/data/gt_1.txt")),
sorted(glob.glob("tests/unit/text/data/ocr_1.txt")),
),
)
@pytest.mark.parametrize("max_seg_length", [75])
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from genalog.text import ner_label
from tests.cases.label_propagation import LABEL_PROPAGATION_REGRESSION_TEST_CASES
from tests.unit.cases.label_propagation import LABEL_PROPAGATION_REGRESSION_TEST_CASES


@pytest.mark.parametrize(
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/text/test_utf8.py → tests/unit/text/test_utf8.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from genalog.text import alignment
from genalog.text.alignment import GAP_CHAR
from tests.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES
from tests.unit.cases.text_alignment import ALIGNMENT_REGRESSION_TEST_CASES


def random_utf8_char(byte_len=1):
Expand Down
Loading

0 comments on commit d738bbb

Please sign in to comment.