Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #2873: DataQuality test specifications and APIs #2906

Merged
merged 5 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.jdbi.v3.sqlobject.transaction.Transaction;
import org.openmetadata.catalog.entity.data.Database;
import org.openmetadata.catalog.entity.data.Table;
import org.openmetadata.catalog.entity.data.TableTest;
import org.openmetadata.catalog.entity.services.DatabaseService;
import org.openmetadata.catalog.exception.CatalogExceptionMessage;
import org.openmetadata.catalog.resources.databases.TableResource;
Expand Down Expand Up @@ -245,6 +246,41 @@ public Table addQuery(UUID tableId, SQLQuery query) throws IOException, ParseExc
return table.withTableQueries(getQueries(table));
}

public Table addTableTest(UUID tableId, TableTest tableTest) throws IOException, ParseException {
// Validate the request content
Table table = daoCollection.tableDAO().findEntityById(tableId);
// if ID is not passed we treat it as a new test case being added
if (tableTest.getId() == null) {
tableTest.setId(UUID.randomUUID());
}
List<TableTest> storedTableTests = getTableTests(table);
Map<UUID, TableTest> storedMapTableTests = new HashMap<>();
if (storedTableTests != null) {
for (TableTest t : storedTableTests) {
storedMapTableTests.put(t.getId(), t);
}
}

// process test result
if (storedMapTableTests.containsKey(tableTest.getId())
&& tableTest.getResults() != null
&& !tableTest.getResults().isEmpty()) {
TableTest prevTableTest = storedMapTableTests.get(tableTest.getId());
List<Object> prevTestCaseResults = prevTableTest.getResults();
List<Object> newTestCaseResults = tableTest.getResults();
newTestCaseResults.addAll(prevTestCaseResults);
tableTest.setResults(newTestCaseResults);
}

storedMapTableTests.put(tableTest.getId(), tableTest);
List<TableTest> updatedQueries = new ArrayList<>(storedMapTableTests.values());
daoCollection
.entityExtensionDAO()
.insert(tableId.toString(), "table.testCases", "tableTest", JsonUtils.pojoToJson(updatedQueries));
setFields(table, Fields.EMPTY_FIELDS);
return table.withTableTests(getTableTests(table));
}

@Transaction
public Table addDataModel(UUID tableId, DataModel dataModel) throws IOException, ParseException {
Table table = daoCollection.tableDAO().findEntityById(tableId);
Expand Down Expand Up @@ -643,6 +679,14 @@ private List<SQLQuery> getQueries(Table table) throws IOException {
return tableQueries;
}

private List<TableTest> getTableTests(Table table) throws IOException {
List<TableTest> tableTests =
JsonUtils.readObjects(
daoCollection.entityExtensionDAO().getExtension(table.getId().toString(), "table.tableTests"),
TableTest.class);
return tableTests;
}

public static class TableEntityInterface implements EntityInterface<Table> {
private final Table entity;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.openmetadata.catalog.Entity;
import org.openmetadata.catalog.api.data.CreateTable;
import org.openmetadata.catalog.entity.data.Table;
import org.openmetadata.catalog.entity.data.TableTest;
import org.openmetadata.catalog.jdbi3.CollectionDAO;
import org.openmetadata.catalog.jdbi3.TableRepository;
import org.openmetadata.catalog.resources.Collection;
Expand Down Expand Up @@ -502,6 +503,20 @@ public Table addDataModel(
return addHref(uriInfo, table);
}

@PUT
@Path("/{id}/test")
@Operation(summary = "Add table test cases", tags = "tables", description = "Add test cases to the table.")
public Table addTableTest(
@Context UriInfo uriInfo,
@Context SecurityContext securityContext,
@Parameter(description = "Id of the table", schema = @Schema(type = "string")) @PathParam("id") String id,
TableTest tableTest)
throws IOException, ParseException {
SecurityUtil.checkAdminOrBotRole(authorizer, securityContext);
Table table = dao.addTableTest(UUID.fromString(id), tableTest);
return addHref(uriInfo, table);
}

@DELETE
@Path("/{id}/followers/{userId}")
@Operation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,14 @@
},
"default": null
},
"tableTests": {
"description": "List of test cases that ran against a table.",
"type": "array",
"items": {
"$ref": "../../tests/tableTest.json"
},
"default": null
},
"dataModel": {
"description": "This captures information about how the table is modeled. Currently only DBT model is supported.",
"$ref": "#/definitions/dataModel"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
harshach marked this conversation as resolved.
Show resolved Hide resolved
"$id": "https://open-metadata.org/schema/test/basic.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Basic",
"description": "This schema defines basic types that are used by other test schemas.",
"definitions": {
"testType": {
"javaType": "org.openmetadata.catalog.type.TestType",
"description": "This schema defines the type used for describing different types of tests.",
"type": "string",
"enum": ["TableTest", "MultiTableTest", "ColumnTest"],
"javaEnums": [
{
"name": "TableTest"
},
{
"name": "MultiTableTest"
},
{
"name": "ColumnTest"
}
]
},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we add tests for say reports, dashboards and other data assets, would they be defined here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes thats the idea . One thing with the JsonSchema lets say we define type like above but I want to designate a type in the test definition. i.e call out a value in the enum to be the type. I am not able to find a way to configure that.

{
  "$id": "https://open-metadata.org/schema/tests/column/columnValuesToMatchRegex.json",
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "columnValuesToBeUnique",
  "description": "This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. ",
  "type": "object",
  "javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToMatchRegex",
  "properties": {
    "column": {
      "description": "Name of the column in a table.",
      "type": "string"
    },
    "regex": {
      "description": "The regular expression the column entries should match.",
      "type": "string"
    }
   "type":  {
        "ref":"./basic.json#definitions/testType#ColumnTest"
      }
  },
  "required": ["column", "regex"]
}

"supportedTestPlatform": {
"javaType": "org.openmetadata.catalog.type.SupportedTestPlatform",
"description": "This schema defines the platform type to run the tests",
"type": "string",
"enum": ["SQLAlchemy", "Spark"],
"javaEnums": [
{
"name": "SQLAlchemy"
},
{
"name": "Spark"
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValueLengthsToBeBetween.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValueLengthsToBeBetween",
"description": "This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValueLengthsToBeBetween",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
},
"minValue": {
"description": "The {minValue} for the column length. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"type": "integer"
},
"maxValue": {
"description": "The {maxValue} for the column length. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"type": "integer"
}
},
"anyOf": [
{ "required": ["minValue", "column"] },
{ "required": ["maxValue", "column"] }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesMissingCountToBeEqual.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesMissingCount",
"description": "This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesMissingCountToBeEqual",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
},
"missingCountValue": {
"description": "No.of missing values to be equal to.",
"type": "integer"
},
"missingValueMatch": {
"description": "By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.",
"items": {
"type": "string"
}
}
},
"required": ["column", "missingValue"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeBetween.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeBetween",
"description": "This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToBeBetween",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
},
"minValue": {
"description": "The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"type": "integer"
},
"maxValue": {
"description": "The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"type": "integer"
}
},
"anyOf": [
{ "required": ["minValue", "column"] },
{ "required": ["maxValue", "column"] }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeNotInSet.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeNotInSet",
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToBeNotInSet",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
},
"values": {
"description": "An Array of values.",
"items": {
"type": "object"
}
}
},
"required": ["column", "values"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeNotNull.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeNotNull",
"description": "This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToBeNotNull",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
}
},
"required": ["column"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeUnique.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeUnique",
"description": "This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToBeUnique",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
}
},
"required": ["column"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToMatchRegex.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeUnique",
"description": "This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. ",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToMatchRegex",
"properties": {
"column": {
"description": "Name of the column in a table.",
"type": "string"
},
"regex": {
"description": "The regular expression the column entries should match.",
"type": "string"
}
},
"required": ["column", "regex"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$id": "https://open-metadata.org/schema/tests/tableRowCountToEqual.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "TableRowCountToEqual",
"description": "This scheam defines the test TableColumnCountToEqual. Test the number of columns equal to a value.",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.table.TableRowCountToEqual",
"properties": {
"value": {
"description": "Expected number of columns to equal to a {value}",
"type": "integer"
}
},
"required": ["value"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$id": "https://open-metadata.org/schema/tests/tableRowCountToBeBetween.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "TableRowCountToEqual",
"description": "This scheam defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.table.TableRowCountToBeBetween",
"properties": {
"minValue": {
"description": "Expected number of rows should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"type": "integer"
},
"maxValue": {
"description": "Expected number of rows should be lower than or equal to {maxValue}. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"type": "integer"
}
},
"anyOf": [{ "required": ["minValue"] }, { "required": ["maxValue"] }]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$id": "https://open-metadata.org/schema/tests/table/tableRowCountToEqual.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "TableRowCountToEqual",
"description": "This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.",
"type": "object",
"javaType": "org.openmetadata.catalog.entity.tests.table.TableRowCountToEqual",
"properties": {
"value": {
"description": "Expected number of rows {value}",
"type": "integer"
}
},
"required": ["value"]
}