-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add AutoML regression testing mechanism (#1966)
* add AutoML regression testing mechanism * move automl tests into subfolder * get dataset name from ludwig dataset registry * add shared helper for creating dataset object * address review comments
- Loading branch information
Showing
5 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
77 changes: 77 additions & 0 deletions
77
tests/regression_tests/automl/golden/adult_census_income.types.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
[ | ||
{ | ||
"column": "age", | ||
"name": "age", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "workclass", | ||
"name": "workclass", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "fnlwgt", | ||
"name": "fnlwgt", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "education", | ||
"name": "education", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "education-num", | ||
"name": "education-num", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "marital-status", | ||
"name": "marital-status", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "occupation", | ||
"name": "occupation", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "relationship", | ||
"name": "relationship", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "race", | ||
"name": "race", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "sex", | ||
"name": "sex", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "capital-gain", | ||
"name": "capital-gain", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "capital-loss", | ||
"name": "capital-loss", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "hours-per-week", | ||
"name": "hours-per-week", | ||
"type": "number" | ||
}, | ||
{ | ||
"column": "native-country", | ||
"name": "native-country", | ||
"type": "category" | ||
}, | ||
{ | ||
"column": "income", | ||
"name": "income", | ||
"type": "category" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[ | ||
{ | ||
"column": "image_path", | ||
"encoder": "stacked_cnn", | ||
"name": "image_path", | ||
"type": "image" | ||
}, | ||
{ | ||
"column": "label", | ||
"name": "label", | ||
"type": "category" | ||
} | ||
] |
29 changes: 29 additions & 0 deletions
29
tests/regression_tests/automl/scripts/update_golden_types.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python | ||
"""This script updates all golden JSON files containing expected data types.""" | ||
import json | ||
|
||
from ludwig.automl.automl import create_auto_config | ||
from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY | ||
|
||
|
||
def write_json_files(): | ||
for dataset_name in TEST_DATASET_REGISTRY: | ||
dataset_obj = get_dataset_object(dataset_name) | ||
dataset = dataset_obj.load(split=False) | ||
|
||
# NOTE: assuming type inference for input and output features is the same | ||
config = create_auto_config( | ||
dataset=dataset, | ||
target=[], | ||
time_limit_s=3600, | ||
tune_for_memory=False, | ||
) | ||
|
||
golden_types_path = get_dataset_golden_types_path(dataset_name) | ||
with open(golden_types_path, "w") as f: | ||
json.dump(config["input_features"], f, indent=4, sort_keys=True) | ||
f.write("\n") | ||
|
||
|
||
if __name__ == "__main__": | ||
write_json_files() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import json | ||
|
||
import pytest | ||
|
||
from ludwig.automl.automl import create_auto_config | ||
from tests.integration_tests.utils import slow | ||
from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY | ||
|
||
|
||
@slow | ||
@pytest.mark.parametrize("dataset_name", TEST_DATASET_REGISTRY) | ||
def test_auto_type_inference_regression(dataset_name): | ||
golden_types_path = get_dataset_golden_types_path(dataset_name) | ||
with open(golden_types_path) as f: | ||
golden_types = json.load(f) | ||
|
||
dataset_obj = get_dataset_object(dataset_name) | ||
dataset = dataset_obj.load(split=False) | ||
|
||
# NOTE: assuming type inference for input and output features is the same | ||
config = create_auto_config( | ||
dataset=dataset, | ||
target=[], | ||
time_limit_s=3600, | ||
tune_for_memory=False, | ||
) | ||
|
||
assert golden_types == config["input_features"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from pathlib import Path | ||
|
||
from ludwig.datasets import dataset_registry | ||
from ludwig.datasets.base_dataset import BaseDataset | ||
|
||
# Subset of Ludwig Dataset Zoo used for AutoML type inference regression tests. | ||
TEST_DATASET_REGISTRY = {"adult_census_income", "mnist"} | ||
|
||
|
||
def get_dataset_golden_types_path(dataset_name: str) -> str: | ||
"""Returns the path to the golden types file for the given dataset.""" | ||
return str(Path(__file__).resolve().parent / "golden" / f"{dataset_name}.types.json") | ||
|
||
|
||
def get_dataset_object(dataset_name: str) -> BaseDataset: | ||
"""Returns a Ludwig dataset instance for the given dataset.""" | ||
return dataset_registry[dataset_name]() |