Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #2873: DataQuality test specifications and APIs #2906

Merged
merged 5 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ private List<Thread> getThreads(ContainerResponseContext responseContext) {
}

var entityInterface = Entity.getEntityInterface(entity);

if (entityInterface.getChangeDescription() == null) {
return null;
}
List<FieldChange> fieldsUpdated = entityInterface.getChangeDescription().getFieldsUpdated();
List<Thread> threads = new ArrayList<>(getThreads(fieldsUpdated, entity, CHANGE_TYPE.UPDATE));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
import org.openmetadata.catalog.entity.services.DatabaseService;
import org.openmetadata.catalog.exception.CatalogExceptionMessage;
import org.openmetadata.catalog.resources.databases.TableResource;
import org.openmetadata.catalog.tests.ColumnTest;
import org.openmetadata.catalog.tests.TableTest;
import org.openmetadata.catalog.tests.type.TestCaseResult;
import org.openmetadata.catalog.type.ChangeDescription;
import org.openmetadata.catalog.type.Column;
import org.openmetadata.catalog.type.ColumnJoin;
Expand Down Expand Up @@ -110,6 +113,8 @@ public Table setFields(Table table, Fields fields) throws IOException, ParseExce
table.setTableProfile(fields.contains("tableProfile") ? getTableProfile(table) : null);
table.setLocation(fields.contains("location") ? getLocation(table) : null);
table.setTableQueries(fields.contains("tableQueries") ? getQueries(table) : null);
table.setTableTests(fields.contains("tests") ? getTableTests(table) : null);
getColumnTests(fields.contains("tests"), table);
return table;
}

Expand Down Expand Up @@ -245,6 +250,90 @@ public Table addQuery(UUID tableId, SQLQuery query) throws IOException, ParseExc
return table.withTableQueries(getQueries(table));
}

@Transaction
public Table addTableTest(UUID tableId, TableTest tableTest) throws IOException, ParseException {
// Validate the request content
Table table = daoCollection.tableDAO().findEntityById(tableId);
// if ID is not passed we treat it as a new test case being added
List<TableTest> storedTableTests = getTableTests(table);
// we will override any test case name passed by user/client with tableName + testType
// our assumption is there is only one instance of a test type as of now.
tableTest.setName(table.getName() + "." + tableTest.getTableTestCase().getTestType().toString());
Map<String, TableTest> storedMapTableTests = new HashMap<>();
if (storedTableTests != null) {
for (TableTest t : storedTableTests) {
storedMapTableTests.put(t.getName(), t);
}
}
// new test add UUID
if (!storedMapTableTests.containsKey(tableTest.getName())) {
tableTest.setId(UUID.randomUUID());
}

// process test result
if (storedMapTableTests.containsKey(tableTest.getName())
&& tableTest.getResults() != null
&& !tableTest.getResults().isEmpty()) {
TableTest prevTableTest = storedMapTableTests.get(tableTest.getName());
List<TestCaseResult> prevTestCaseResults = prevTableTest.getResults();
List<TestCaseResult> newTestCaseResults = tableTest.getResults();
newTestCaseResults.addAll(prevTestCaseResults);
tableTest.setResults(newTestCaseResults);
}

storedMapTableTests.put(tableTest.getName(), tableTest);
List<TableTest> updatedTests = new ArrayList<>(storedMapTableTests.values());
daoCollection
.entityExtensionDAO()
.insert(tableId.toString(), "table.tableTests", "tableTest", JsonUtils.pojoToJson(updatedTests));
setFields(table, Fields.EMPTY_FIELDS);
return table.withTableTests(getTableTests(table));
}

@Transaction
public Table addColumnTest(UUID tableId, ColumnTest columnTest) throws IOException, ParseException {
// Validate the request content
Table table = daoCollection.tableDAO().findEntityById(tableId);
String columnName = columnTest.getColumnName();
validateColumn(table, columnName);
// we will override any test case name passed by user/client with columnName + testType
// our assumption is there is only one instance of a test type as of now.
columnTest.setName(columnName + "." + columnTest.getTestCase().getTestType().toString());
List<ColumnTest> storedColumnTests = getColumnTests(table, columnName);
Map<String, ColumnTest> storedMapColumnTests = new HashMap<>();
if (storedColumnTests != null) {
for (ColumnTest ct : storedColumnTests) {
storedMapColumnTests.put(ct.getName(), ct);
}
}

// new test, generate UUID
if (!storedMapColumnTests.containsKey(columnTest.getName())) {
columnTest.setId(UUID.randomUUID());
}

// process test result
if (storedMapColumnTests.containsKey(columnTest.getName())
&& columnTest.getResults() != null
&& !columnTest.getResults().isEmpty()) {
ColumnTest prevColumnTest = storedMapColumnTests.get(columnTest.getName());
List<TestCaseResult> prevTestCaseResults = prevColumnTest.getResults();
List<TestCaseResult> newTestCaseResults = columnTest.getResults();
newTestCaseResults.addAll(prevTestCaseResults);
columnTest.setResults(newTestCaseResults);
}

storedMapColumnTests.put(columnTest.getName(), columnTest);
List<ColumnTest> updatedTests = new ArrayList<>(storedMapColumnTests.values());
String extension = "table.column." + columnName + ".tests";
daoCollection
.entityExtensionDAO()
.insert(table.getId().toString(), extension, "columnTest", JsonUtils.pojoToJson(updatedTests));
setFields(table, Fields.EMPTY_FIELDS);
getColumnTests(true, table);
return table;
}

@Transaction
public Table addDataModel(UUID tableId, DataModel dataModel) throws IOException, ParseException {
Table table = daoCollection.tableDAO().findEntityById(tableId);
Expand Down Expand Up @@ -643,6 +732,24 @@ private List<SQLQuery> getQueries(Table table) throws IOException {
return tableQueries;
}

private List<TableTest> getTableTests(Table table) throws IOException {
return JsonUtils.readObjects(
daoCollection.entityExtensionDAO().getExtension(table.getId().toString(), "table.tableTests"), TableTest.class);
}

private List<ColumnTest> getColumnTests(Table table, String columnName) throws IOException {
String extension = "table.column." + columnName + ".tests";
return JsonUtils.readObjects(
daoCollection.entityExtensionDAO().getExtension(table.getId().toString(), extension), ColumnTest.class);
}

private void getColumnTests(boolean setTests, Table table) throws IOException {
List<Column> columns = table.getColumns();
for (Column c : Optional.ofNullable(columns).orElse(Collections.emptyList())) {
c.setColumnTests(setTests ? getColumnTests(table, c.getName()) : null);
}
}

public static class TableEntityInterface implements EntityInterface<Table> {
private final Table entity;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
import org.openmetadata.catalog.resources.Collection;
import org.openmetadata.catalog.security.Authorizer;
import org.openmetadata.catalog.security.SecurityUtil;
import org.openmetadata.catalog.tests.ColumnTest;
import org.openmetadata.catalog.tests.TableTest;
import org.openmetadata.catalog.type.DataModel;
import org.openmetadata.catalog.type.EntityHistory;
import org.openmetadata.catalog.type.Include;
Expand Down Expand Up @@ -106,7 +108,7 @@ public TableList(List<Table> data, String beforeCursor, String afterCursor, int

static final String FIELDS =
"columns,tableConstraints,usageSummary,owner,"
+ "tags,followers,joins,sampleData,viewDefinition,tableProfile,location,tableQueries,dataModel";
+ "tags,followers,joins,sampleData,viewDefinition,tableProfile,location,tableQueries,dataModel,tests";
public static final List<String> FIELD_LIST = Arrays.asList(FIELDS.replace(" ", "").split(","));

@GET
Expand Down Expand Up @@ -502,6 +504,34 @@ public Table addDataModel(
return addHref(uriInfo, table);
}

@PUT
@Path("/{id}/tableTest")
@Operation(summary = "Add table test cases", tags = "tables", description = "Add test cases to the table.")
public Table addTableTest(
@Context UriInfo uriInfo,
@Context SecurityContext securityContext,
@Parameter(description = "Id of the table", schema = @Schema(type = "string")) @PathParam("id") String id,
TableTest tableTest)
throws IOException, ParseException {
SecurityUtil.checkAdminOrBotRole(authorizer, securityContext);
Table table = dao.addTableTest(UUID.fromString(id), tableTest);
return addHref(uriInfo, table);
}

@PUT
@Path("/{id}/columnTest")
@Operation(summary = "Add table test cases", tags = "tables", description = "Add test cases to the table.")
public Table addColumnTest(
@Context UriInfo uriInfo,
@Context SecurityContext securityContext,
@Parameter(description = "Id of the table", schema = @Schema(type = "string")) @PathParam("id") String id,
ColumnTest columnTest)
throws IOException, ParseException {
SecurityUtil.checkAdminOrBotRole(authorizer, securityContext);
Table table = dao.addColumnTest(UUID.fromString(id), columnTest);
return addHref(uriInfo, table);
}

@DELETE
@Path("/{id}/followers/{userId}")
@Operation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,14 @@
"$ref": "#/definitions/column"
},
"default": null
},
"columnTests": {
"description": "List of column test cases that ran against a table column.",
"type": "array",
"items": {
"$ref": "../../tests/columnTest.json"
},
"default": null
}
},
"required": ["name", "dataType"],
Expand Down Expand Up @@ -588,6 +596,14 @@
},
"default": null
},
"tableTests": {
"description": "List of test cases that ran against a table.",
"type": "array",
"items": {
"$ref": "../../tests/tableTest.json"
},
"default": null
},
"dataModel": {
"description": "This captures information about how the table is modeled. Currently only DBT model is supported.",
"$ref": "#/definitions/dataModel"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
harshach marked this conversation as resolved.
Show resolved Hide resolved
"$id": "https://open-metadata.org/schema/tests/basic.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Basic",
"description": "This schema defines basic types that are used by other test schemas.",
"definitions": {
"testCaseResult": {
"description": "Schema to capture test case result.",
"javaType": "org.openmetadata.catalog.tests.type.TestCaseResult",
"type": "object",
"properties": {
"executionTime": {
"description": "Data one which profile is taken.",
"$ref": "../type/basic.json#/definitions/timestamp"
},
"status": {
"description": "Status of Test Case run.",
"javaType": "org.openmetadata.catalog.tests.type.TestCaseStatus",
"type": "string",
"enum": ["Success", "Failed", "Aborted"],
"javaEnums": [
{
"name": "Success"
},
{
"name": "Failed"
},
{
"name": "Aborted"
}
]
},
"result": {
"description": "Details of test case results.",
"type": "string"
},
"sampleData": {
"description": "sample data to capture rows/columns that didn't match the expressed testcase.",
"type": "string"
}
}
},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we add tests for say reports, dashboards and other data assets, would they be defined here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes thats the idea . One thing with the JsonSchema lets say we define type like above but I want to designate a type in the test definition. i.e call out a value in the enum to be the type. I am not able to find a way to configure that.

{
  "$id": "https://open-metadata.org/schema/tests/column/columnValuesToMatchRegex.json",
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "columnValuesToBeUnique",
  "description": "This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. ",
  "type": "object",
  "javaType": "org.openmetadata.catalog.entity.tests.column.ColumnValuesToMatchRegex",
  "properties": {
    "column": {
      "description": "Name of the column in a table.",
      "type": "string"
    },
    "regex": {
      "description": "The regular expression the column entries should match.",
      "type": "string"
    }
   "type":  {
        "ref":"./basic.json#definitions/testType#ColumnTest"
      }
  },
  "required": ["column", "regex"]
}

"testCaseExecutionFrequency": {
"description": "How often the test case should run.",
"javaType": "org.openmetadata.catalog.tests.type.TestCaseExecutionFrequency",
"type": "string",
"enum": ["Hourly", "Daily", "Weekly"],
"javaEnums": [
{
"name": "Hourly"
},
{
"name": "Daily"
},
{
"name": "Weekly"
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValueLengthsToBeBetween.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValueLengthsToBeBetween",
"description": "This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. ",
"type": "object",
"javaType": "org.openmetadata.catalog.tests.column.ColumnValueLengthsToBeBetween",
"properties": {
"minValue": {
"description": "The {minValue} for the column length. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"type": "integer"
},
"maxValue": {
"description": "The {maxValue} for the column length. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"type": "integer"
}
},
"anyOf": [
{
"required": ["minValue"]
},
{
"required": ["maxValue"]
}
],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesMissingCountToBeEqual.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesMissingCount",
"description": "This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. ",
"type": "object",
"javaType": "org.openmetadata.catalog.tests.column.ColumnValuesMissingCountToBeEqual",
"properties": {
"missingCountValue": {
"description": "No.of missing values to be equal to.",
"type": "integer"
},
"missingValueMatch": {
"description": "By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.",
"items": {
"type": "string"
}
}
},
"required": ["missingValue"],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeBetween.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeBetween",
"description": "This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. ",
"type": "object",
"javaType": "org.openmetadata.catalog.tests.column.ColumnValuesToBeBetween",
"properties": {
"minValue": {
"description": "The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"type": "integer"
},
"maxValue": {
"description": "The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"type": "integer"
}
},
"anyOf": [{ "required": ["minValue"] }, { "required": ["maxValue"] }],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeNotInSet.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeNotInSet",
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
"type": "object",
"javaType": "org.openmetadata.catalog.tests.column.ColumnValuesToBeNotInSet",
"properties": {
"values": {
"description": "An Array of values.",
"items": {
"type": "object"
}
}
},
"required": ["values"],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"$id": "https://open-metadata.org/schema/tests/column/columnValuesToBeNotNull.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "columnValuesToBeNotNull",
"description": "This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. ",
"type": "object",
"javaType": "org.openmetadata.catalog.tests.column.ColumnValuesToBeNotNull",
"properties": {},
"additionalProperties": false
}