# Storing graphs on the online database storage server

In [1]:
from rdflib import Graph
from rdflib import URIRef, Graph, BNode, Literal
from rdflib import Namespace
from rdflib.namespace import RDF, RDFS, OWL, DC, FOAF, XSD, SKOS
from odt.skosnavigate import SKOSNavigate
from uuid import uuid4
from pymongo import MongoClient
import pandas as pd
import numpy as np 

ODT = Namespace('http://www.quaat.com/ontologies#')
DCAT = Namespace('http://www.w3.org/ns/dcat#')
DCT = Namespace('http://purl.org/dc/terms/')
ODTX = Namespace('http://www.quaat.com/ontology/ODTX#')
QEX = Namespace('http://www.quaat.com/extended_skos#')
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
WN20SCHEMA = Namespace('http://www.w3.org/2006/03/wn/wn20/schema/')
DN = Namespace('http://www.wordnet.dk/owl/instance/2009/03/instances/')
DN_SCHEMA = Namespace('http://www.wordnet.dk/owl/instance/2009/03/schema/')


In [None]:
graph = Graph()
ODT = Namespace('http://www.quaat.com/ontologies#')
graph.bind('odt', ODT)
with open('satellittdata.json') as file:
    data = file.read()
    graph.parse(data=data, format='json-ld')
    
graph.serialize()

In [16]:
graph = Graph()
graph.parse('otd-ontology-v5.owl', format='xml')
jld = graph.serialize(format='json-ld')
uri = 'mongodb://{0}:{1}@ds119969.mlab.com:19969/ontodb'.format('nims', '******')
client = MongoClient(uri)
db = client.ontodb
onts = db.ontologies
ont = {"label": "test", "ontology": jld}
ontId = onts.insert_one(ont).inserted_id
print (str(ontId))

5b2ab51401d5412566cf4e94


In [6]:
graph = Graph()
graph.parse('all_output.rdf', format='xml')
jld = graph.serialize(format='json-ld')
uri = 'mongodb://{0}:{1}@ds119969.mlab.com:19969/ontodb'.format('nims', '******')
client = MongoClient(uri)
db = client.ontodb
onts = db.ontologies
ont = {"label": "datasets", "datasets": jld}
ontId = onts.insert_one(ont).inserted_id
print (str(ontId))

5b2968c501d5412566cf4e86


In [23]:
graph = Graph()
graph.parse('tagged-v0.8.json', format='json-ld')
jld = graph.serialize(format='json-ld')
uri = 'mongodb://{0}:{1}@ds119969.mlab.com:19969/ontodb'.format('nims', '******')
client = MongoClient(uri)
db = client.ontodb
onts = db.ontologies 
ont = {"label": "tagged", "similarity": jld}
ontId = onts.insert_one(ont).inserted_id
print (str(ontId))

5b2ada4d01d5412566cf4ea1


In [19]:
# Store automatic tagging scheme
graph = Graph()
graph.parse('autotag-no-v5.0.rdf', format='xml')
jld = graph.serialize(format='json-ld')
uri = 'mongodb://{0}:{1}@ds119969.mlab.com:19969/ontodb'.format('nims', '******')
client = MongoClient(uri)
db = client.ontodb
onts = db.ontologies
ont = {"label": "autotagged", "autotag": jld}
ontId = onts.insert_one(ont).inserted_id
print (str(ontId))

5b2acdfe01d5412566cf4e99


In [None]:
from odt.database import load_autotagged
from rdflib import Namespace
from rdflib.namespace import RDF, RDFS, OWL, DC, FOAF, XSD, SKOS
ODT = Namespace('http://www.quaat.com/ontologies#')
DCAT = Namespace('http://www.w3.org/ns/dcat#')
DCT = Namespace('http://purl.org/dc/terms/')

g = load_autotagged(uri, str(ontId))
g.bind('odt', ODT)
g.bind('dcat', DCAT)
g.bind('dct', DCT)
g.serialize()

In [None]:
from bson.objectid import ObjectId
client = MongoClient(uri)
db = client.ontodb
onts = db.ontologies    
doc = onts.find_one({'_id': ObjectId(ontId)})
d = doc['ontology'].decode("utf-8")
graph = Graph()
graph.parse(data=d, format='json-ld')
#graph.serialize()

In [None]:
import requests
package_list = requests.get('http://78.91.98.234:5000/api/3/action/package_list')
package_list.status_code
graph = Graph()
for res in package_list.json()['result']:
    dataset='http://78.91.98.234:5000/dataset/{0}.rdf'.format(res)
    data = requests.get(dataset)
    graph.parse(data=data.text)
graph.serialize(destination='all_output.rdf', format='xml')

# Dataset analysis. 
This section is copied from the "Extract concepts with preferred and alternative labels"


In [18]:
# Fetching the ontology from the database
from odt.database import load_ontology
from odt.ordvev import OrdVev

ontology_uuid = '5b2ab51401d5412566cf4e94'
uri = 'mongodb://{0}:{1}@ds119969.mlab.com:19969/ontodb'.format('nims', '******')

graph = load_ontology(uri, ontology_uuid)
#graph.serialize(destination='5afc0fb801d5415a9d324003.rdf', format='xml')

ordvev = OrdVev()
# List english concepts and dictionary concepts
from nltk.corpus import wordnet as wn

In [11]:
num_concepts = 0
num_no_preflabel_in_dict = 0
num_en_preflabel_in_dict = 0
num_no_altlabel_in_dict = 0
num_en_altlabel_in_dict = 0
num_en_covered = 0
num_nb_covered = 0

def synset_exists(tok):
    if len(wn.synsets(tok)) > 0:
        return True
    return False

def first(xs):
    if xs:
        return xs[0]
    return None

undefined_en_vocab = []
undefined_en_alt_vocab = []
undefined_vocab = []
undefined_alt_vocab = []

for concept in graph.subjects(RDF.type, SKOS.Concept):
    en_covered = False
    nb_covered = False
    num_concepts += 1    
    for pref in [l for l in graph.objects(concept, SKOS.prefLabel) if l.language == 'en']:
        if wn.synsets(pref):
            num_en_preflabel_in_dict += 1
            en_covered = True
            break
        else:
            undefined_en_vocab.append(pref)
        
    for alt in [l for l in graph.objects(concept, SKOS.altLabel) if l.language == 'en']:        
        if wn.synsets(alt):
            num_en_altlabel_in_dict += 1
            en_covered = True
            break
        else:
            undefined_en_alt_vocab.append(alt)
                
    for pref in [l for l in graph.objects(concept, SKOS.prefLabel) if l.language == 'nb']:
        if list(ordvev.synsets(pref)):
            num_no_preflabel_in_dict += 1
            nb_covered = True
            break
        else:
            undefined_vocab.append(pref)
            
    for alt in [l for l in graph.objects(concept, SKOS.altLabel) if l.language == 'nb']:
        if list(ordvev.synsets(alt)):
            num_no_altlabel_in_dict += 1
            nb_covered = True
            break
        else:
            undefined_alt_vocab.append(alt)
            
    if en_covered:
        num_en_covered += 1
        
    if nb_covered:
        num_nb_covered += 1
        
print ('num concepts: ', num_concepts)
print ('num english preflabels in dictionary: {0} ({1}%)'.format(num_en_preflabel_in_dict, 100*num_en_preflabel_in_dict/num_concepts))
print ('num english altlabels in dictionary: {0} ({1}%)'.format(num_en_altlabel_in_dict, 100*num_en_altlabel_in_dict/num_concepts))
print ('num norwegian preflabels in dictionary: {0} ({1}%)'.format(num_no_preflabel_in_dict, 100*num_no_preflabel_in_dict/num_concepts))
print ('num norwegian altlabels in dictionary: {0} ({1}%)'.format(num_no_altlabel_in_dict, 100*num_no_altlabel_in_dict/num_concepts))
print ('num english concepts covered: {0} ({1}%)'.format(num_en_covered, 100*num_en_covered/num_concepts))
print ('num norwegian concepts covered: {0} ({1}%)'.format(num_nb_covered, 100*num_nb_covered/num_concepts))

print ("\n=== Undefined pref 'en' tokens")
for voc in undefined_en_vocab:
    print (str(voc))
    
print ("\n=== Undefined alternative 'en' tokens")
for voc in undefined_en_alt_vocab:
    print (str(voc))

print ("\n=== Undefined pref 'nb' tokens")
for voc in undefined_vocab:
    print (str(voc))
    
print ("\n=== Undefined alternative 'nb' tokens")
for voc in undefined_alt_vocab:
    print (str(voc))

num concepts:  102
num english preflabels in dictionary: 67 (65.68627450980392%)
num english altlabels in dictionary: 4 (3.9215686274509802%)
num norwegian preflabels in dictionary: 72 (70.58823529411765%)
num norwegian altlabels in dictionary: 9 (8.823529411764707%)
num english concepts covered: 71 (69.6078431372549%)
num norwegian concepts covered: 75 (73.52941176470588%)

=== Undefined pref 'en' tokens
travel plan
personal transport
topographic point
public transport
traffic condition
point of interest
geographical information
weather forecast
street sign
service area
real time
bus stop
emission information
charging station
traffic information
environment information
route plan
traffic flow
railway junction
hydrometeorological information
bus terminal
air quality
traffic queue
park and ride
weather condition
meteorological information
traffic circle
transport mode
API description
transfer node
transport network condition
tracking information
travel information
t junction
tool booth


landevei - fylkesvei
meteorologisk informasjon - meteoriologi
transportnettilstand
topografisk punkt
trafikkforhold - trafikkproblem
persolig transport
trafikkø
serviceområde
trafikkinformasjon
offentlig transport - kollektivtransport
bomstasjon - veiavgift(?)
entitet
trafikkskilt - veiskilt
rundkjøring
ruteplan
ladestasjon
3-veis kryss - gatekryss
trafikkflyt - trafikkstrøm
overføringspunkt
