# Validate and prepare YAML policy files

In [1]:
import copy
import pathlib
import json
import os

import networkx
from pykwalify.core import (
    Rule,
    Core as Kwalify,
)
import ruamel.yaml

In [2]:
schema_path = 'schema.yml'
test_paths = list(map(str, pathlib.Path('test-policies').glob('*.yml')))

In [3]:
def schema_to_template(rule):
    """
    Recursively convert rules to a blank template object.
    """
    if rule.type == 'seq':
        return [schema_to_template(value) for value in rule.sequence]
    elif rule.type == 'map':
        return {key: schema_to_template(value) for key, value in rule.mapping.items()}
    else:
        return None

In [4]:
# Create template
with open('schema.yml') as read_file:
    schema = ruamel.yaml.safe_load(read_file)
root_rule = Rule(schema)
template = schema_to_template(root_rule)
with open('template.yml', 'w') as write_file:
    ruamel.yaml.round_trip_dump(template, write_file, default_flow_style=False)

In [5]:
# Perform tests
for test_path in test_paths:
    kore = Kwalify(
        source_file=test_path,
        schema_files=[schema_path],
        strict_rule_validation=True,
    )
    data = kore.validate(raise_exception=False)

validation.invalid
 --- All found errors ---
["Value '11' is not of type 'str'. Path: '/policy-id'"]
Errors found but will not raise exception...


In [6]:
# Read RoMEO policy ontology
with open('../romeo/data/ontology.json') as read_file:
    data = json.load(read_file)
    graph = networkx.node_link_graph(data)

In [7]:
# Create stubs for each record
for node, data in graph.nodes(data=True):
    if 'Journal Title' in data:
        continue
    record = copy.deepcopy(template)
    record['policy-id'] = node
    record['publisher'] = data['Publisher']
    record['policy-heading'] = data['Policy Heading']
    child_policies, journals = list(), list()
    for inheritor in networkx.ancestors(graph, node):
        inheritor_data = graph.node[inheritor]
        if 'Journal Title' not in inheritor_data:
            child_policies.append(inheritor)
        else:
            journals.append(inheritor)
    record['journals'] = sorted(journals)
    record['child-policies'] = sorted(child_policies)
    record['parent-policies'] = sorted(networkx.descendants(graph, node))
    kwalify = Kwalify(schema_data=schema, source_data=record)
    validated_record = kwalify.validate()
    if os.environ.get('TRAVIS', 'false') == 'true':
        # Skip writing files on Travis.
        # Was getting "FileNotFoundError: [Errno 2] No such file or directory". See
        # https://travis-ci.com/transpose-publishing/policies-database/builds/72971278#L756
        continue
    path = pathlib.Path(f'policies/{node}.yml').resolve()
    with path.open('w') as write_file:
        ruamel.yaml.round_trip_dump(record, write_file, default_flow_style=False)