2. Validate data.  Demonstrates how to validate a dataset against a schema and produce an error report 

We are going to use the metadata generated in the previous notebook to validate a schema

In [None]:
# Most of the validation functions are in the goodtables package
from goodtables import validate
import pandas as pd 

In [None]:
from tableschema import Schema
table_schema = Schema("schemas/table_test_data.json")
table_schema.valid

In [None]:
path_noerrors = "schemas/data/test_data_no_errors.csv"
path_errors = "schemas/data/test_data_with_errors.csv"

In [None]:
# File 1 is a csv that 
with open(path_noerrors) as f:
    print(f.read())
pd.read_csv(path_noerrors)

In [None]:
with open(path_errors) as f:
    print(f.read())
    

pd.read_csv(path_errors)

In [None]:
validate(path_noerrors, schema=table_schema.descriptor)

In [None]:
validate(path_errors, schema=table_schema.descriptor)

## Other forms of validation

There are other inbuilt forms of validation

In [None]:
import json
table_schema_json = """
{"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
    "fields": [
        {
            "name": "a",
            "type": "integer",
            "format": "default",
            "constraints": {"minimum": 2}
        },
        {
            "name": "b",
            "type": "string",
            "format": "default",
            "constraints": {"unique": true, "enum": ["y", "z"]}
        }
    ],
    "missingValues": [
        ""
    ]
}
"""
table_schema = json.loads(table_schema_json)

In [None]:
path_othervalidation = "schemas/data/other_validation.csv"
with open(path_othervalidation) as f:
    print(f.read())

In [None]:
validate(path_othervalidation, schema=table_schema)

## Extending validation with custom validators 

In [None]:
import json
table_schema_json = """
{"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
    "fields": [
        {
            "name": "a",
            "type": "integer",
            "format": "default",
            "constraints": {"minimum": 2}
        },
        {
            "name": "b",
            "type": "string",
            "format": "default"
        }
    ],
    "missingValues": [
        ""
    ]
}
"""
table_schema = json.loads(table_schema_json)

In [None]:
from goodtables import check, Error

@check('custom-check', type='custom', context='body')
def custom_check(cells, *args, **kwargs):
    errors = []
    for cell in cells:
        if cell["header"] == "b" and cell["value"] == "z":
            message = 'Custom error on column value london found'

            error = Error(
                'custom-error',
                cell,

                message=message
            )
            errors.append(error)
    return errors

validate(path_othervalidation, schema=table_schema, checks=['custom-check', 'schema'])