In [89]:
from rdflib import Graph
from rdflib.namespace import OWL, DC, DCTERMS, RDF, RDFS, SKOS, XSD
from rdflib import URIRef, BNode, Literal
import json
import os
import pandas as pd
import pylode
import re

In [90]:
g = Graph()
##namespace
NS = "https://foodperiodictable.org/ptfi/terms/"
## create ontology
ptfi = URIRef(NS)
g.add((ptfi, RDF.type, OWL.Ontology)) ## not sure it should be defined as an ontology, so commenting for now
g.add((ptfi, DCTERMS.modified, Literal('2022-06-01',datatype=XSD.date))) 
g.add((ptfi, DCTERMS.title, Literal('PTFI Metadata Terms related to Specimen', lang='en'))) 


<Graph identifier=N1bb88316860d4a1d88c706ed9db3981e (<class 'rdflib.graph.Graph'>)>

In [91]:
xls = pd.ExcelFile('./Sample_Metadata_PTFI_050222.xlsx')
df = pd.read_excel(xls, 'application profile')
df.dropna(how='all', axis=1)
df.fillna('', inplace=True)


for index, row in df.iterrows() :
    element_id = row['metadata element IRI/ID']
    element_name = row['metadata element name']
    element_description = row['definition']
    comment = row['comment']
    example = row['see']
    cardinality = row['occurrence']
    obligation = row['obligation']
    datatype = row['datatype']
    controlled_vocab = row['vocabulary encoding scheme']
    module = row['module']
    vocab = row['"in-house" vocabulary']
    
    datarange = None
    vocabUri = None
    
    ## if module does not exist, create a class
    ##module
    moduleUri = URIRef(NS + module.replace(' ', '_'))
    if not (None, RDFS.label, Literal(module, lang='en')) in g:
        ##create module as owl class
        g.add((moduleUri, RDF.type, RDFS.Class))  ## SKOS.Concept
        g.add((moduleUri, RDFS.label, Literal(module, lang='en')))
        g.add((moduleUri, DCTERMS.issued, Literal('2022-06-01',datatype=XSD.date)))
        g.add((moduleUri, RDFS.isDefinedBy, ptfi))
        g.add((moduleUri, DCTERMS.description, Literal(module, lang='en')))
    ## then for now just create the element, may need to add which element apply for what class
    elementURI = URIRef(NS + element_id)
    g.add((elementURI, RDFS.label, Literal(element_name, lang='en')))
    g.add((elementURI, RDF.type, RDF.Property))
    g.add((elementURI, DCTERMS.description, Literal(element_description, lang='en')))
    g.add((elementURI, DCTERMS.issued, Literal('2022-06-01',datatype=XSD.date)))
    g.add((elementURI, RDFS.isDefinedBy, ptfi))
    g.add((elementURI, RDFS.domain, moduleUri))
    
    if comment:
        g.add((elementURI, RDFS.comment, Literal(comment, lang='en')))
    if example:
        g.add((elementURI, RDFS.seeAlso, Literal(example, lang='en')))
    if controlled_vocab:
        if controlled_vocab == 'FOODON' and 'food product' in comment:
            g.add((elementURI, RDFS.range, URIRef('http://purl.obolibrary.org/obo/FOODON_00002381')))
            vocabUri = URIRef('http://purl.obolibrary.org/obo/FOODON_00002381')
        elif controlled_vocab == 'FOODON' and 'process' in comment:
            g.add((elementURI, RDFS.range, URIRef('http://purl.obolibrary.org/obo/FOODON_00002451')))
            vocabUri = URIRef('http://purl.obolibrary.org/obo/FOODON_00002451')
        elif controlled_vocab == 'unit ontology':
            g.add((elementURI, RDFS.range, URIRef('http://purl.obolibrary.org/obo/UO_0000000')))
            vocabUri = URIRef('http://purl.obolibrary.org/obo/UO_0000000')
        elif controlled_vocab == 'NCBITaxon':
            g.add((elementURI, RDFS.range, URIRef('http://purl.obolibrary.org/obo/NCBITaxon_1')))
            vocabUri = URIRef('http://purl.obolibrary.org/obo/NCBITaxon_1')
        elif 'vocab' in controlled_vocab:
            ## create the in house vocab then add the range to the element
            vocabUri = URIRef(NS + controlled_vocab.replace(' ', '_')+'_vocab')
            g.add((vocabUri, RDF.type, RDFS.Class))  ## SKOS.Concept
            g.add((vocabUri, RDFS.label, Literal(controlled_vocab, lang='en')))
            g.add((vocabUri, DCTERMS.issued, Literal('2022-06-01',datatype=XSD.date)))
            g.add((vocabUri, RDFS.isDefinedBy, ptfi))
            ## add terms to the vocab
            for v in vocab.split("|"):
                vUri = URIRef(NS + v.replace(' ', '_'))
                g.add((vUri, RDF.type, RDFS.Class))  ## SKOS.Concept
                g.add((vUri, RDFS.label, Literal(v, lang='en')))
                g.add((vUri, DCTERMS.issued, Literal('2022-06-01',datatype=XSD.date)))
                g.add((vUri, RDFS.isDefinedBy, ptfi))
                g.add((vUri, RDFS.subClassOf, vocabUri))
                #g.add((vUri, DCTERMS.description, Literal(module, lang='en')))
            ## add range
            g.add((elementURI, RDFS.range, vocabUri))
        else:
            print(element_name)
    else:
        if datatype == 'date':
            g.add((elementURI, RDFS.range, XSD.date))
            datarange = XSD.date
        elif datatype == 'float':
            g.add((elementURI, RDFS.range, XSD.float))
            datarange = XSD.float
        elif datatype == 'string':
            g.add((elementURI, RDFS.range, XSD.string))
            datarange = XSD.string
        elif datatype == 'binary/image':
            g.add((elementURI, RDFS.range, XSD.base64Binary))
            datarange = XSD.base64Binary
        elif datatype == 'uuid':
            g.add((elementURI, RDFS.range, XSD.string))
            datarange = XSD.string
        elif datatype == 'integer':
            g.add((elementURI, RDFS.range, XSD.integer))
            datarange = XSD.integer
        else:
            print(element_name)
    if obligation:
        if datarange:
            if cardinality == 'unique':
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.qualifiedCardinality, Literal(1)))
                g.add((br, OWL.onDataRange, datarange))
                g.add((moduleUri, RDFS.subClassOf, br))
            else:
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.minQualifiedCardinality, Literal(1)))
                g.add((br, OWL.onDataRange, datarange))
                g.add((moduleUri, RDFS.subClassOf, br))
        else:
            if cardinality == 'unique':
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.qualifiedCardinality, Literal(1)))
                g.add((br, OWL.onClass, vocabUri))
                g.add((moduleUri, RDFS.subClassOf, br))
            else:
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.minQualifiedCardinality, Literal(1)))
                g.add((br, OWL.onClass, vocabUri))
                g.add((moduleUri, RDFS.subClassOf, br))
            
    else:
        if datarange:
            if cardinality == 'unique':
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.maxQualifiedCardinality, Literal(1)))
                g.add((br, OWL.onDataRange, datarange))
                g.add((moduleUri, RDFS.subClassOf, br))
            else:
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.minQualifiedCardinality, Literal(0)))
                g.add((br, OWL.onDataRange, datarange))
                g.add((moduleUri, RDFS.subClassOf, br))
        else:
            if cardinality == 'unique':
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.maxQualifiedCardinality, Literal(1)))
                g.add((br, OWL.onClass, vocabUri))
                g.add((moduleUri, RDFS.subClassOf, br))
            else:
                br = BNode()
                g.add((br, RDF.type, OWL.Restriction))
                g.add((br, OWL.onProperty, elementURI))
                g.add((br, OWL.minQualifiedCardinality, Literal(0)))
                g.add((br, OWL.onClass, vocabUri))
                g.add((moduleUri, RDFS.subClassOf, br))
            
            

In [92]:
# Print the graph
#print(g.serialize(format="turtle"))

In [95]:
g.serialize(destination= "PTFI_application.ttl", format="turtle")


<Graph identifier=N1bb88316860d4a1d88c706ed9db3981e (<class 'rdflib.graph.Graph'>)>

In [96]:
### documentation
html = pylode.MakeDocco(
    input_data_file="PTFI_application_json-ld.json",
    outputformat="html",
    profile="ontdoc"
).document()
f = open('documentation.html', 'w' )
f.write( html )
f.close()

AttributeError: 'str' object has no attribute 'decode'