In [1]:
import json
from pyld import jsonld
from rdflib import URIRef, BNode, Literal, Namespace
import uuid
from datetime import date

path = '/home/jovyan/tim/ScienceDirect/labs/xocs/'
pii = 'S0002822307022067'
fn = path + pii + '.json'
with open(fn) as json_data:
    d = json.load(json_data)
    json_data.close()
print (d['PII'])

S0002822307022067


In [2]:
context = {
  "@context": {"@import": "http://www.w3.org/ns/anno.jsonld",
    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "edm": "https://data.elsevier.com/schema/edm/",
    "oa": "http://www.w3.org/ns/oa#",
    "skos": "http://www.w3.org/2004/02/skos/core#",
    "scibite": "http://scibite.com/ontology/scibite#",
    "tr": "http://scibite.com/ontology/termiteresult#",
    "ts": "https://data.elsevier.com/lifescience/termite/schema/",
    "idtype": "https://data.elsevier.com/e/identifier/",
    "ef": "https://data.elsevier.com/lifescience/ef/",
    "efs": "https://data.elsevier.com/lifescience/schema/efs/"
              }
}

In [3]:

edm = Namespace("https://data.elsevier.com/schema/edm/")
ef = Namespace("https://data.elsevier.com/lifescience/ef/")
idtype = Namespace("https://data.elsevier.com/e/identifier/")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
tr = Namespace("http://scibite.com/ontology/termiteresult#")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
oa = Namespace("http://www.w3.org/ns/oa#")
               

today = str(date.today())
               
"""starting items"""
doc  = {'@id': d['PII'],
  '@type': "Article"}
"""Authors"""
authors = []
for au in d['Authors']:
    authors.append({'id':ef.person + str(uuid.uuid4()), edm.familyName:au['surname'], edm.firstName:au['forename']})
doc[edm.hasAuthor] = authors
"""Identifiers"""
identifiers = []
identifiers.append({'@type': idtype.DOI, "@value": d['DOI']})
identifiers.append({'@type': idtype.PII, "@value": d['PII']})
identifiers.append({'@type': edm.issn, "@value": d['ISSN']})
identifiers.append({'@type': ef.cid, "@value": d['CID']})
doc[edm.identifiers] = identifiers
"""Title & Abstract"""
doc[edm.title] = d['Title']
doc[edm.abstract] = d['Abstract']
"""Dates"""
doc[edm.publishedDate] = {'@type': xsd.date, '@value': d['PublicationDate']}
doc[edm.modifiedDate] = {'@type': xsd.date, '@value': today}
"""Journal"""
doc[edm.includedIn] = {'@type': edm.journal, '@value': d['JournalTitle']}


In [9]:
"""Entities"""
ents = []
for e in d['Entities']:
    # We have to do the whole monty for each location
    try:
        diff =  len(e['Locations'])- len(e['XPaths'])
        if diff != 0:
            print (e['PreferredName'] + " " + str(diff))
    except:
        pass
    my_base_id = d['PII'] + "_xml_" + e['EntityType'] + "_" + e['ID'] + "_"
    loc_counter = 0
    for loc in e['Locations']:
        # each entry has an id, type(=='Annotation'), body, target and edm:mentions)
        #ID
        entry = {'@id': "http://scibite.com/ontology/termiteresult#" + my_base_id + str(loc_counter)}
        #TYPE
        entry['@type'] = 'Annotation'
        #BODY
        body = {'@id': "http://scibite.com/ontology/termiteresult#" + e['EntityType'] + "_" + e['ID']}
        body['@type'] = skos.Concept
        body[skos.prefLabel] = e['PreferredName']
        """body/broader"""
        broader = {'@id':"https://data.elsevier.com/lifescience/termite/taxonomy/" + e['EntityType']}
        broader['@type'] = skos.Concept
        broader[skos.prefLabel] = e['EntityType']
        body[skos.broader] = broader
        entry[tr.body] = body
        #TARGET
        target = {'@id':"http://scibite.com/ontology/termiteresult#" + my_base_id + "_" + str(loc_counter) + "_target"}
        target['@type'] = oa.Target
        """target/selector"""
        selector = {'@id':"http://scibite.com/ontology/termiteresult#" + my_base_id + "_" + str(loc_counter) + "_selector"}
        selector['@type'] = oa.TextPositionSelector
        selector[tr.sentence] = loc['Sentence']
        selector[oa.start] = loc['Start']
        selector[oa.end] = loc['End']
        #NB there is not a one-to-one concordance of Locations to XPaths
        selector[oa.XPathSelector] = {oa.XPathSelector : "http:/example.com/TODO"}
        target[oa.selector] = selector
        """target/source"""
        target[tr.source] = {'@id': "http://scibite.com/ontology/termiteresult#" + d['PII'] + "_xml", '@type': edm.Work}
        entry[oa.target] = target
        #MENTIONS
        entry[edm.mentions] = {'@id': "http://scibite.com/ontology/termiteresult#" + e['EntityType'] + "_" + e['ID'] }
        loc_counter += 1
        #TODO HigherTerms
        """
                    "HigherTerms": [
                {
                    "TreePathCode": "CHEMBL_ATC/Thing/A/A07/A07A/A07AA/A07AA04",
                    "TreePath": "CHEMBL_ATC/Thing/ALIMENTARY TRACT AND METABOLISM/ANTIDIARRHEALS, INTESTINAL ANTIINFLAMMATORY\\/ANTIINFECTIVE AGENTS/INTESTINAL ANTIINFECTIVES/Antibiotics/streptomycin"
                },
                {
                    "TreePathCode": "CHEMBL_ATC/Thing/J/J01/J01G/J01GA/J01GA01",
                    "TreePath": "CHEMBL_ATC/Thing/ANTIINFECTIVES FOR SYSTEMIC USE/ANTIBACTERIALS FOR SYSTEMIC USE/AMINOGLYCOSIDE ANTIBACTERIALS/Streptomycins/streptomycin"
                },
                {
                    "TreePathCode": "CHEMBL_ATC/Thing/A/A07/A07A/A07AA/A07AA54",
                    "TreePath": "CHEMBL_ATC/Thing/ALIMENTARY TRACT AND METABOLISM/ANTIDIARRHEALS, INTESTINAL ANTIINFLAMMATORY\\/ANTIINFECTIVE AGENTS/INTESTINAL ANTIINFECTIVES/Antibiotics/streptomycin, combinations"
                }
            ],
        """
        ents.append(entry)
doc['@graph'] = ents

Neoplastic Cell Transformation 117
vitamin 9
Vegetables 1
Carcinoma 110
Hypertension 1
lung 1


In [None]:
compacted = jsonld.compact(doc, context)
outfile = path + pii + ".schema.json"
with open(outfile, 'w') as f:
    f.write(json.dumps(compacted))
#print(json.dumps(compacted, indent=2))