rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment')

In [52]:
import csv, rdflib, re

g = rdflib.Graph()
base = rdflib.Namespace('http://Ameneh.org/aircraft.owl#')
g.bind(None, base)
parts_catalog = csv.DictReader(open('pdf-extracted/parts-catalog.csv'))
for line in parts_catalog:
    system, assembly, cls = [
        base[re.sub('[^a-zA-Z]', '', line[key].title())]
        for key in ['Section','Figure','Type']
    ]
    label = line['Specifics'].strip() + ' ' + line['Type'].lower()
    s = base['partnr-' + line['Part Number']]
    triples = [
        (s, rdflib.RDF['type'], cls),
        (s, base['partOf'], assembly),
        (s, base['partOf'], system),
        (s, base['partNumber'], rdflib.Literal(line['Part Number']) ),
        (s, rdflib.RDFS['label'], rdflib.Literal(label) ),

        (assembly, rdflib.RDF['type'], base['Assembly']),
        (assembly, rdflib.RDFS['label'], rdflib.Literal(line['Figure'])),

        (system, rdflib.RDF['type'], base['System']),
        (system, rdflib.RDFS['label'], rdflib.Literal(line['Section'])),
    ]
    for t in triples:
        g.add(t)

lines = csv.DictReader(
    open('prompt-extracted/part-classes.tsv'),
    delimiter='\t'
)
for line in lines:
    part, cls = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['Part','subClassOf']
    ]

    triples = [
        (part, rdflib.RDFS['label'], rdflib.Literal(line['Part'])),
        (part, rdflib.RDFS['subClassOf'], cls),
        (cls, rdflib.RDFS['label'], rdflib.Literal(line['subClassOf'])),
        (cls, rdflib.RDFS['subClassOf'], base['Part']),
    ]
    for t in triples:
        g.add(t)

g.serialize('generated-rdf/parts-catalog.ttl', format='ttl')

<Graph identifier=N03589ea4dc05489c8dcf332b6efbbba1 (<class 'rdflib.graph.Graph'>)>

In [43]:
import csv, rdflib, re

g = rdflib.Graph()
base = rdflib.Namespace('http://Ameneh.org/aircraft.owl#')
g.bind(None, base)
troubleshooting = csv.DictReader(open('pdf-extracted/troubleshooting.csv'))
for line in troubleshooting:
    trouble, cause = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['TROUBLE','PROBABLE CAUSE']
    ]
    remedy = rdflib.BNode()

    triples = [
        (trouble, rdflib.RDF['type'], base['Problem']),
        (trouble, rdflib.RDFS['label'], rdflib.Literal(line['TROUBLE']) ),
        (cause, rdflib.RDF['type'], base['Problem']),
        (cause, rdflib.RDFS['label'], rdflib.Literal(line['PROBABLE CAUSE']) ),
        (trouble, base['hasCause'], cause),
        
        (remedy, rdflib.RDFS['label'], rdflib.Literal(line['REMEDY'])),
        (remedy, rdflib.RDF['type'], base['Solution']),
        (remedy, base["solves"], cause),
    ]
    for t in triples:
        g.add(t)

g.serialize('generated-rdf/troubleshooting.ttl', format='ttl')

<Graph identifier=Nbb280eec1d2b4271845e0efb31e17b3f (<class 'rdflib.graph.Graph'>)>

In [53]:
import csv, rdflib, re

g = rdflib.Graph()
base = rdflib.Namespace('http://Ameneh.org/aircraft.owl#')
g.bind(None, base)
lines = csv.DictReader(
    open('prompt-extracted/problem-component-function.tsv'),
    delimiter='\t'
)
for line in lines:
    problem, component, function = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['defines','functionOf','Function']
    ]

    triples = [
        (function, rdflib.RDF['type'], base['Function']),
        (function, rdflib.RDFS['label'], rdflib.Literal(line['Function'])),
        (function, base['defines'], problem),
        
        (problem, rdflib.RDFS['label'], rdflib.Literal(line['defines'])),
        
        (component, rdflib.RDF['type'], base['Component']),
        (component, rdflib.RDFS['label'], rdflib.Literal(line['functionOf'])),
        (component, base['hasFunction'], function),
    ]
    for t in triples:
        g.add(t)


lines = csv.DictReader(
    open('prompt-extracted/functions.tsv'),
    delimiter='\t'
)
for line in lines:
    component, function = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['Component','hasFunction']
    ]

    triples = [
        (function, rdflib.RDFS['label'], rdflib.Literal(line['hasFunction'])),
        (function, rdflib.RDF['type'], base['Function']),
        (component, base['hasFunction'], function),
        (component, rdflib.RDF['type'], base['Component']),
    ]
    for t in triples:
        g.add(t)


lines = csv.DictReader(
    open('prompt-extracted/subfunction.tsv'),
    delimiter='\t'
)
for line in lines:
    function, subfunction = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['subFunctionOf','Function']
    ]

    triples = [
        (subfunction, base['subFunctionOf'], function),
    ]
    for t in triples:
        g.add(t)

lines = csv.DictReader(
    open('prompt-extracted/dependsOn.tsv'),
    delimiter='\t'
)
for line in lines:
    c1, c2 = [
        base[ re.sub('[^a-zA-Z]', '', line[key].split('(')[0].title()) ]
        for key in ['Component','dependsOn']
    ]

    triples = [
        (c1, base['dependsOn'], c2),
    ]
    for t in triples:
        g.add(t)


g.serialize('generated-rdf/functions.ttl', format='ttl')

<Graph identifier=Nb26630ca02c94c6394d0fd07cb726b5c (<class 'rdflib.graph.Graph'>)>