In [1]:
from csv import DictReader
from AP import AP, PropertyStatement, ShapeInfo
from AP2SHACL import AP2SHACLConverter
import pprint
pp = pprint.PrettyPrinter(indent=2)
lang = "en"

In [2]:
inputFN = "./sampleData/simpleBook.csv"

In [3]:
gapMetadata  = list()
gapNodeData = list()
gapLinkData  = list()
with open(inputFN, "r") as inFile:
    csvReader = DictReader(inFile)
    for row in csvReader:
        if row["Name"] == "Page":
            gapMetadata.append(row)
        if row["Name"] == "Shape":
            gapNodeData.append(row)
        if row["Name"] == "Line":
            gapLinkData.append(row)

In [4]:
ap = AP()
fields = ["dct:creator", "dct:title", "dct:date"]
for row in gapMetadata:
    if row["Name"] == "Page":
        for field in fields:
            ap.add_metadata(field, row[field])
        for ns in row["prefixes"].split('\n'):
            [pre, uri] = ns.split(" ")
            ap.add_namespace(pre, uri)        

# ap.dump()

In [5]:
shapeIndex = dict() #use this when processing links

for row in gapNodeData:
    shapeIndex[row["Id"]] = row["shapeid"]

    sh = ShapeInfo()
    sh.set_id(row["shapeid"])
    sh.add_label(lang, row["shapelabel"])
    sh.add_comment(lang, row["comment"])
    sh.add_note(lang, row["note"])
    sh.append_target(row["target"], row["targettype"])
    sh.set_severity(row["severity"])
    if row["open or closed"] == "Closed":
        sh.set_closed("True")
        for p in row["ignoreproperties"].split('\n'):
            sh.add_ignoreProps(p)
    elif row["open or closed"] == "Open":
        sh.set_closed("False")
    if row["optional or mandatory"] == "Mandatory":
        sh.set_mandatory("True")
    elif row["optional or mandatory"] == "Optional":
        sh.set_mandatory("False")
    ap.add_shapeInfo(row["shapeid"], sh)
    if row["rdf:type"] != "":
        ps = PropertyStatement()
        ps.add_property("rdf:type")
        ps.add_shape(row["shapeid"])
        ps.add_label("en", "type")
        ps.add_mandatory(True)
        ps.add_repeatable(False)
        ps.add_valueNodeType("IRI")
        ps.add_valueConstraint(row["rdf:type"])
        ap.add_propertyStatement(ps)
    if row["Text Area 3"] != "":
        for p in row["Text Area 3"].split("\n"):
            if len(p) > 2:  # avoid blank lines
                ps = PropertyStatement()
                ps.add_shape(row["shapeid"])
                [label,rest] = p.split(" «")
                [prop, rest] = rest.split("» ")
                rest = rest.split()
                ps.add_property(prop)
                ps.add_label("en", label)
                ps.add_valueNodeType("Literal")
                if len(rest) > 0:
                    ps.add_valueDataType(rest[0])
                    if len(rest) > 1:
                        token = rest[1]
                        if token == "":
                            ps.add_mandatory(True)
                            ps.add_repeatable(False)
                        elif token == "+":
                            ps.add_mandatory(True)
                            ps.add_repeatable(True)
                        elif token == "?":
                            ps.add_mandatory(False)
                            ps.add_repeatable(False)
                        elif token == "*":
                            ps.add_mandatory(False)
                            ps.add_repeatable(True)
                    else:
                            ps.add_mandatory(True)
                            ps.add_repeatable(False)
            ap.add_propertyStatement(ps)
                


In [6]:
for row in gapLinkData:
    ps = PropertyStatement()
    if (row['Source Arrow'] == "None") and (row['Destination Arrow'] == "Arrow"):
        source = shapeIndex[row["Line Source"]]
        destination = shapeIndex[row["Line Destination"]]
    elif (row['Source Arrow'] == "Arrow") and (row['Destination Arrow'] == "None"):
        source = shapeIndex[row["Line Destination"]]
        destination = shapeIndex[row["Line Source"]]
    if row["Text Area 2"] == "1..1":
        ps.add_mandatory(True)
        ps.add_repeatable(False)
    elif row["Text Area 2"] == "1..*":
        ps.add_mandatory(True)
        ps.add_repeatable(True)
    elif row["Text Area 2"] == "0..1":
        ps.add_mandatory(False)
        ps.add_repeatable(False)
    elif row["Text Area 2"] == "0..*":
        ps.add_mandatory(False)
        ps.add_repeatable(True)
    else:
        ps.add_mandatory(False)
        ps.add_repeatable(True)
    ps.add_shape(source)
    ps.add_property(row["property"])
    ps.add_label("en", row['Text Area 1'])
    ps.add_valueNodeType("iri")
    ps.add_valueShape(destination)
    ap.add_propertyStatement(ps)

In [7]:
#ap.dump()
ap2shaclConverter = AP2SHACLConverter(ap)
ap2shaclConverter.convert_AP_SHACL()
ap2shaclConverter.dump_shacl()

# SHACL generated by python AP to shacl converter
@base <http://example.org/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sdo: <https://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<BookShape> a sh:NodeShape ;
    sh:class sdo:Book ;
    sh:closed true ;
    sh:description "Shape for describing books"@en ;
    sh:ignoredProperties [ ] ;
    sh:name "Book"@en ;
    sh:property <bookshapeAuthor>,
        <bookshapeISBN>,
        <bookshapeTitle> ;
    sh:targetClass sdo:Book .

<AuthorShape> a sh:NodeShape ;
    sh:class foaf:Person ;
    sh:closed false ;
    sh:description "Shape for describing authors"@en ;
    sh:name "Author"@en ;
    sh:property <authorshapeFamilyname>,
        <authorshapeGivenname> ;
    sh:targetObjectsOf dct:creator .

<authorshapeFamilyname> a sh:PropertyShape ;
    sh:datatype xsd: