# Validate our example JSON against our schemas

# Validate JSON Dialog Events
The following code takes all of the sample dialog events in `sample-json` and validates them against the schema `/schemas/dialogEvent.json`

In [1]:
from jsonschema import validate
import json

with open("../../schemas/dialogEvent-schema.json", 'r') as f:
    schema = json.load(f)

stems=['figure2','figure3','figure4','utterance0','utterance4a','utterance5']

for s in stems:
    print(f'Validating JSON: {s}.json')
    with open(f'../sample-json/{s}.json', 'r') as f:     
        instance = json.load(f)
        validate(instance=instance, schema=schema)     

Validating JSON: figure2.json
Validating JSON: figure3.json
Validating JSON: figure4.json
Validating JSON: utterance0.json
Validating JSON: utterance4a.json
Validating JSON: utterance5.json


## Validate YML Dialog Events
The OVON Dialog Object standard is specified in JSON but the libraries in this repository also support YML equivalents with conversion both ways.    This code validates the YML that the JSON was generated from.  ( See dialog-packet.ipynb)

In [1]:
from jsonschema import validate
import yaml
import json

with open("../../schemas/dialog-event/1.0.1/dialog-event-schema.json", 'r') as f:
    schema = json.load(f)

stems=['figure2','figure3','figure4','utterance0','utterance4a','utterance5']
for s in stems:
    print(f'Validating YAML: {s}.yml')
    with open(f'../sample-yaml/{s}.yml', 'r') as f:     
        instance = yaml.safe_load(f)
        validate(instance=instance, schema=schema)     

Validating YAML: figure2.yml


ValidationError: 'speakerId' is a required property

Failed validating 'required' in schema:
    {'$defs': {'features': {'properties': {'encoding': {'description': 'The '
                                                                       'text '
                                                                       'encoding '
                                                                       'of '
                                                                       'the '
                                                                       'token '
                                                                       'values',
                                                        'type': 'string'},
                                           'lang': {'description': 'The '
                                                                   'language '
                                                                   'of the '
                                                                   'token '
                                                                   'values',
                                                    'type': 'string'},
                                           'mimeType': {'description': 'The '
                                                                       'mimeType '
                                                                       'of '
                                                                       'the '
                                                                       'token '
                                                                       'values',
                                                        'type': 'string'},
                                           'tokenSchema': {'description': 'A '
                                                                          'schema '
                                                                          'restricting '
                                                                          'the '
                                                                          'token '
                                                                          'values',
                                                           'type': 'string'},
                                           'tokens': {'alternates': {'items': {'items': {'$ref': '#/$defs/token',
                                                                                         'type': 'object'},
                                                                               'type': 'array'},
                                                                     'type': 'array'},
                                                      'items': {'$ref': '#/$defs/token'},
                                                      'type': 'array'}},
                            'required': ['mimeType', 'tokens'],
                            'type': 'object'},
               'isoDuration': {'description': 'A string in ISO 8601 '
                                              'duration format.'},
               'isoTime': {'description': 'A string in ISO 8601 absolute '
                                          'format.'},
               'jsonpath': {'description': 'an expression in JSON Path '
                                           'syntax',
                            'type': 'string'},
               'span': {'anyOf': [{'required': ['startTime']},
                                  {'required': ['startOffset']}],
                        'properties': {'endOffset': {'$ref': '#/$defs/isoDuration'},
                                       'endTime': {'$ref': '#/$defs/isoTime'},
                                       'startOffset': {'$ref': '#/$defs/isoDuration'},
                                       'startTime': {'$ref': '#/$defs/isoTime'}}},
               'token': {'anyOf': [{'required': ['value']},
                                   {'required': ['valueUrl']}],
                         'properties': {'confidence': {'type': 'number'},
                                        'links': {'items': {'$ref': '#/$defs/jsonpath'},
                                                  'type': 'array'},
                                        'span': {'$ref': '#/$defs/span'},
                                        'value': {'type': ['number',
                                                           'string',
                                                           'object',
                                                           'array',
                                                           'boolean']},
                                        'valueUrl': {'$ref': '#/$defs/url'}},
                         'type': 'object'},
               'url': {'description': 'Any valid URL', 'type': 'string'}},
     '$id': 'https://openvoicenetwork.org/schema/dialogEvent.json',
     '$schema': 'https://openvoicenetwork.org/schema',
     'description': "A representation of a 'language event’ that is to say "
                    'any information associated with a phrase, utterance '
                    'or part of an utterance.',
     'properties': {'features': {'patternProperties': {'.*': {'$ref': '#/$defs/features'}},
                                 'type': 'object'},
                    'id': {'type': 'string'},
                    'previousId': {'type': 'string'},
                    'span': {'$ref': '#/$defs/span'},
                    'speakerID': {'type': 'string'}},
     'required': ['id', 'speakerId', 'span', 'features'],
     'type': 'object'}

On instance:
    {'features': {'my-audio-feature': {'mimeType': 'audio/wav',
                                       'tokens': [{'value-url': 'http://localhost/xyz1234.wav'}]},
                  'my-text-token-feature': {'encoding': 'UTF-8',
                                            'lang': 'en',
                                            'mimeType': 'text/plain',
                                            'span': {'endOffset': 'PT0.1045',
                                                     'startTime': 'PT0.0210'},
                                            'tokenSchema': '',
                                            'tokens': [{'value': 'what'},
                                                       {'value': 'is'},
                                                       {'value': 'the'},
                                                       {'value': 'weather'},
                                                       {'value': 'forecast'},
                                                       {'value': 'for'},
                                                       {'value': 'tomorrow'}]}},
     'id': 'user-utterance-30',
     'previousId': 'user-utterance-28',
     'span': {'endOffset': 'PT0.1045',
              'startTime': '2022-12-20 15:59:01.246500+00:00'},
     'speakerID': 'b5y09lky5KU5'}