Skip to content

Commit

Permalink
add AutoML regression testing mechanism (#1966)
Browse files Browse the repository at this point in the history
* add AutoML regression testing mechanism

* move automl tests into subfolder

* get dataset name from ludwig dataset registry

* add shared helper for creating dataset object

* address review comments
  • Loading branch information
jppgks committed May 9, 2022
1 parent 1da6536 commit a01336d
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
[
{
"column": "age",
"name": "age",
"type": "number"
},
{
"column": "workclass",
"name": "workclass",
"type": "category"
},
{
"column": "fnlwgt",
"name": "fnlwgt",
"type": "number"
},
{
"column": "education",
"name": "education",
"type": "category"
},
{
"column": "education-num",
"name": "education-num",
"type": "number"
},
{
"column": "marital-status",
"name": "marital-status",
"type": "category"
},
{
"column": "occupation",
"name": "occupation",
"type": "category"
},
{
"column": "relationship",
"name": "relationship",
"type": "category"
},
{
"column": "race",
"name": "race",
"type": "category"
},
{
"column": "sex",
"name": "sex",
"type": "category"
},
{
"column": "capital-gain",
"name": "capital-gain",
"type": "number"
},
{
"column": "capital-loss",
"name": "capital-loss",
"type": "number"
},
{
"column": "hours-per-week",
"name": "hours-per-week",
"type": "number"
},
{
"column": "native-country",
"name": "native-country",
"type": "category"
},
{
"column": "income",
"name": "income",
"type": "category"
}
]
13 changes: 13 additions & 0 deletions tests/regression_tests/automl/golden/mnist.types.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"column": "image_path",
"encoder": "stacked_cnn",
"name": "image_path",
"type": "image"
},
{
"column": "label",
"name": "label",
"type": "category"
}
]
29 changes: 29 additions & 0 deletions tests/regression_tests/automl/scripts/update_golden_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python
"""This script updates all golden JSON files containing expected data types."""
import json

from ludwig.automl.automl import create_auto_config
from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY


def write_json_files():
for dataset_name in TEST_DATASET_REGISTRY:
dataset_obj = get_dataset_object(dataset_name)
dataset = dataset_obj.load(split=False)

# NOTE: assuming type inference for input and output features is the same
config = create_auto_config(
dataset=dataset,
target=[],
time_limit_s=3600,
tune_for_memory=False,
)

golden_types_path = get_dataset_golden_types_path(dataset_name)
with open(golden_types_path, "w") as f:
json.dump(config["input_features"], f, indent=4, sort_keys=True)
f.write("\n")


if __name__ == "__main__":
write_json_files()
28 changes: 28 additions & 0 deletions tests/regression_tests/automl/test_type_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json

import pytest

from ludwig.automl.automl import create_auto_config
from tests.integration_tests.utils import slow
from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY


@slow
@pytest.mark.parametrize("dataset_name", TEST_DATASET_REGISTRY)
def test_auto_type_inference_regression(dataset_name):
golden_types_path = get_dataset_golden_types_path(dataset_name)
with open(golden_types_path) as f:
golden_types = json.load(f)

dataset_obj = get_dataset_object(dataset_name)
dataset = dataset_obj.load(split=False)

# NOTE: assuming type inference for input and output features is the same
config = create_auto_config(
dataset=dataset,
target=[],
time_limit_s=3600,
tune_for_memory=False,
)

assert golden_types == config["input_features"]
17 changes: 17 additions & 0 deletions tests/regression_tests/automl/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pathlib import Path

from ludwig.datasets import dataset_registry
from ludwig.datasets.base_dataset import BaseDataset

# Subset of Ludwig Dataset Zoo used for AutoML type inference regression tests.
TEST_DATASET_REGISTRY = {"adult_census_income", "mnist"}


def get_dataset_golden_types_path(dataset_name: str) -> str:
"""Returns the path to the golden types file for the given dataset."""
return str(Path(__file__).resolve().parent / "golden" / f"{dataset_name}.types.json")


def get_dataset_object(dataset_name: str) -> BaseDataset:
"""Returns a Ludwig dataset instance for the given dataset."""
return dataset_registry[dataset_name]()

0 comments on commit a01336d

Please sign in to comment.