In [1]:
import rdflib 
from rdflib import Graph

# Tests

In [2]:
d = """
@prefix ocs: <https://w3id.org/ocs/ont/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

ocs:C1 a skos:Concept ;
    skos:closeMatch <http://dbpedia.org/resource/Computer_science> ;
    skos:prefLabel "Computer science" ;
    skos:related ocs:C8,
        ocs:C10,
        ocs:C14,
        ocs:C19,
        ocs:C28,
        ocs:C197 .


"""

graph = Graph()
graph.parse(data=d)

print(len(graph))

9


In [3]:
graph.items(1)

<generator object Graph.items at 0x000001F507FAD850>

In [4]:
enumerate(graph)

<enumerate at 0x1f509d3e5c0>

In [5]:
for index, (sub, pred, obj) in enumerate(graph):
    print(index)
    print(sub)
    print(pred)
    print(obj)

0
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C14
1
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C10
2
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C197
3
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#closeMatch
http://dbpedia.org/resource/Computer_science
4
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C28
5
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C19
6
https://w3id.org/ocs/ont/C1
http://www.w3.org/1999/02/22-rdf-syntax-ns#type
http://www.w3.org/2004/02/skos/core#Concept
7
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#related
https://w3id.org/ocs/ont/C8
8
https://w3id.org/ocs/ont/C1
http://www.w3.org/2004/02/skos/core#prefLabel
Computer science


# Tests for main code

In [28]:
# finding all file

import glob

dirs = glob.glob('../../../OpenCS/ontology/core/*')
files = []
for i in dirs:
    files += glob.glob(i + '/*.ttl')

In [30]:
# importing all graphs

graphs = []

for f in files:
    graph = Graph()
    graph.parse(f, format='ttl')
    graphs.append(graph)

In [31]:
# finding unique relations

preds = []

for g in graphs:
    for index, (sub, pred, obj) in enumerate(g):
        preds.append(pred)
        
preds = list(set(preds))

In [32]:
preds

[rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#related'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#broader'),
 rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#closeMatch'),
 rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel')]

In [33]:
# finding all name - label pairs
labels_dict = {}

for g in graphs:
    for index, (sub, pred, obj) in enumerate(g):
         if pred == rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'):
                labels_dict.update({sub.split('/')[-1] : str(obj)})

In [11]:
# finding types
for g in graphs:
    for index, (sub, pred, obj) in enumerate(g):
         if pred == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
                print(index)
                print(sub)
                print(pred)
                print(obj)

In [34]:
# define template
template = """
{
    "mappings": {
        "properties": {
            "name": {
                "type": "text"
            },
            "label": {
                "type": "text"
            },
            "type": {
                "type": "text"
            },
            "closeMatch": {
                "type": "text"
            },
            "related": {
                "type": "text"
            },
            "broader": {
                "type": "text"
            }
        }
    }
}
"""


In [35]:
import elasticsearch as es
from elasticsearch import Elasticsearch
from datetime import datetime

In [36]:
# function to connect to

def connect_elasticsearch():
    _es = None
    _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    if _es.ping():
        print('Yay Connected')
    else:
        print('Awww it could not connect!')
    return _es

In [37]:
# function to add row to index

def store_record(es_object, index, data):
    is_stored = True

    try:
        outcome = es_object.index(index=index, doc_type='_doc', body=json.dumps(data))
        print(outcome)
    except Exception as ex:
        print('Error in indexing data')
        print(str(ex))
        is_stored = False
    finally:
        return is_stored

In [18]:
import requests 
import json

query = json.loads(template)
response = requests.put('http://localhost:9200/ontology',
                       json=query)
response.json()

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'ontology'}

In [38]:
for g in graphs:

    name = None
    labels = []
    closeMatch = []
    related = []
    broader = []
    type = None

    for index, (sub, pred, obj) in enumerate(g):
        name = sub.split('/')[-1]
        if pred == rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
            type = obj.split('#')[-1]
        if pred == rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#broader'):
            broader.append(obj.split('/')[-1])
        if pred == rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#closeMatch'):
            closeMatch.append(str(obj))
        if pred == rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#related'):
            related.append(obj.split('/')[-1])
        if pred == rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'):
            labels.append(str(obj))

    for i in range(len(related)):
        related[i] = labels_dict[related[i]]

    for i in range(len(broader)):
        broader[i] = labels_dict[broader[i]]

    row = {
        'name' : name,
        'label' : labels,
        'type' : type,
        'closeMatch': closeMatch,
        'related' : related,
        'broader' : broader
    }

    data = row
    es = connect_elasticsearch()
    r = store_record(es, 'ontology', data)
    print(r)

Yay Connected
{'_index': 'ontology', '_type': '_doc', '_id': 'im8zL4UB0sfRnO4yHXRc', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
True
Yay Connected
{'_index': 'ontology', '_type': '_doc', '_id': 'i28zL4UB0sfRnO4yHXSz', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
True
Yay Connected
{'_index': 'ontology', '_type': '_doc', '_id': 'jG8zL4UB0sfRnO4yHnQY', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
True
Yay Connected
{'_index': 'ontology', '_type': '_doc', '_id': 'jW8zL4UB0sfRnO4yHnRQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
True
Yay Connected
{'_index': 'ontology', '_type': '_doc', '_id': 'jm8zL4UB0sfRnO4yHnSC', '_version': 1, 'result': 'created', '_shards': {'total'