In [1]:
import rdflib

In [2]:
g = rdflib.Graph()
result = g.parse("http://www.w3.org/People/Berners-Lee/card")

print("graph has %s statements." % len(g))
# prints graph has 79 statements.

graph has 88 statements.


In [3]:
for subj, pred, obj in g:
   if (subj, pred, obj) not in g:
       raise Exception("It better be!")

s = g.serialize(format='n3')

In [4]:
from rdflib import Graph, Literal, BNode, Namespace, RDF, URIRef
from rdflib.namespace import DC, FOAF

g = Graph()

# Create an identifier to use as the subject for Donna.
donna = BNode()

# Add triples using store's add method.
g.add( (donna, RDF.type, FOAF.Person) )
g.add( (donna, FOAF.nick, Literal("donna", lang="foo")) )
g.add( (donna, FOAF.name, Literal("Donna Fales")) )
g.add( (donna, FOAF.mbox, URIRef("mailto:donna@example.org")) )

# Iterate over triples in store and print them out.
print("--- printing raw triples ---")
for s, p, o in g:
    print((s, p, o))

# For each foaf:Person in the store print out its mbox property.
print("--- printing mboxes ---")
for person in g.subjects(RDF.type, FOAF.Person):
    for mbox in g.objects(person, FOAF.mbox):
        print(mbox)

# Bind a few prefix, namespace pairs for more readable output
g.bind("dc", DC)
g.bind("foaf", FOAF)

print( g.serialize(format='n3') )

--- printing raw triples ---
(rdflib.term.BNode('Nbbf3186bd7494e298b4c145b17e110c3'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/nick'), rdflib.term.Literal('donna', lang='foo'))
(rdflib.term.BNode('Nbbf3186bd7494e298b4c145b17e110c3'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Donna Fales'))
(rdflib.term.BNode('Nbbf3186bd7494e298b4c145b17e110c3'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:donna@example.org'))
(rdflib.term.BNode('Nbbf3186bd7494e298b4c145b17e110c3'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/Person'))
--- printing mboxes ---
mailto:donna@example.org
b'@prefix dc: <http://purl.org/dc/elements/1.1/> .\n@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix xml: <http://www.w3.org/XML/1998/namespace> .\n@prefix

In [5]:
# from https://github.com/RDFLib/rdflib
g=rdflib.Graph()
g.load('http://dbpedia.org/resource/Semantic_Web')

In [6]:
for s,p,o in g:
    print(s,p,o)

http://dbpedia.org/resource/Semantic_Web http://www.w3.org/2000/01/rdf-schema#comment A Web semântica é uma extensão da World Wide Web atual, que permitirá aos computadores e humanos trabalharem em cooperação. A Web semântica interliga significados de palavras e, neste âmbito, tem como finalidade conseguir atribuir um significado (sentido) aos conteúdos publicados na Internet de modo que seja perceptível tanto pelo humano como pelo computador. A ideia da Web Semântica surgiu em 2001, quando Tim Berners-Lee, James Hendler e Ora Lassila publicaram um artigo na revista Scientific American, intitulado: “Web Semântica: um novo formato de conteúdo para a Web que tem significado para computadores vai iniciar uma revolução de novas possibilidades.”
http://dbpedia.org/resource/Semantic_Web http://www.w3.org/2002/07/owl#sameAs http://yago-knowledge.org/resource/Semantic_Web
http://dbpedia.org/resource/Semantic_Web http://www.w3.org/2000/01/rdf-schema#comment Con il termine web semantico, termine

---
## Getting data from the Semantic Web
[semanticweb.org](http://semanticweb.org/wiki/Getting_data_from_the_Semantic_Web.html)

We shall parse some RDF/XML from DBpedia on a number of people. The way you parse RDF with rdflib is you create a Graph, which is a sort of empty holder for data. Imagine this as a big container for data, and you can throw in to the container as much data as you like, then just filter out the bits you want.

First we should import the Graph class from the rdflib package and create a Graph instance.

In [1]:
from rdflib import Graph, URIRef

In [2]:
g = Graph()

In [3]:
[method_name for method_name in dir(g)
 if callable(getattr(g, method_name))]

['_Graph__get_identifier',
 '_Graph__get_store',
 '__add__',
 '__and__',
 '__class__',
 '__cmp__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__init__',
 '__isub__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 '_get_namespace_manager',
 '_process_skolem_tuples',
 '_set_namespace_manager',
 'absolutize',
 'add',
 'addN',
 'all_nodes',
 'bind',
 'close',
 'collection',
 'comment',
 'commit',
 'compute_qname',
 'connected',
 'de_skolemize',
 'destroy',
 'isomorphic',
 'items',
 'label',
 'load',
 'md5_term_hash',
 'n3',
 'namespaces',
 'objects',
 'open',
 'parse',
 'predicate_objects',
 'predicates',
 'preferredLabel',
 'qname',
 'query',
 'remove',
 'resource',
 'rollback',
 'seq',
 'serialize

The 'g' variable now has an empty graph.

Now we should load some data from the web. The graph object has a method called 'parse' which allows you to give it a file name from your local system or an HTTP URI, as well as an optional format, and it will try to load data from that source. We'll load in data about Elvis Presley.

In [4]:
g.parse("http://dbpedia.org/resource/Elvis_Presley")

<Graph identifier=N3e307f08b1e94f63a378d640955ee582 (<class 'rdflib.graph.Graph'>)>

In [8]:
len(g)

1605

RDF as a data format is basically a graph that is built up of 'triple' statements. These are made up of subjects, predicates and objects, like simple sentences in a natural language like English. The graph having 1605 statements is a bit like it having 1605 individual sentences, but not necessarily about the same thing. Those sentences are of the form:

- Elvis Presley is a rock-and-roll singer.
- Elvis Presley was born in the United States.
- Elvis Presley was born on the 8 January 1935.

An RDF graph doesn't all have to be on the same topic. It could freely have 'sentences' about Elvis Presley, Bondi Beach, Barack Obama, the Moon, Camembert, your pet cat, a news article on the trial of a Nazi war criminal, triangles, some particualr species of whale, a television programme, and anything else that is a "thing".

RDF has sentences like this translated into a machine-readable structure. The subjects – 'objects' in an object-oriented sense – are URIs, as are the predicates (like 'is a', 'was born in' etc.) and the 'objects' of the sentence are either URIs of other resources or they are 'literals', blobs of data.

We're going to retrieve some literals: RDF literals are basically strings. Other datatypes exist but are implemented as a type restriction on a string. So, for instance, integers or floats or dates are just strings with a little tag on them saying "by the way, this is an integer (or a float or a date or whatever)". If you know about XML, the datatypes used in RDF literals come from XML Schema (don't worry: RDF doesn't worry about the rest of the stuff in the XML Schema spec!).

So let's retrieve the birth and death dates from the graph. The first thing we need to know are the URIs of the properties. On DBpedia, the URIs used for this are:

http://dbpedia.org/ontology/birthDate  
http://dbpedia.org/ontology/deathDate  


To retrieve the birth date, we use a method called "subject_objects" on the graph object, which takes a URIRef (an object that wraps a URI) as an argument and then returns all the statements that match that as a Python generator

In [27]:
for stmt in g.subject_objects(URIRef("http://dbpedia.org/ontology/birthDate")):
     print (stmt)

(rdflib.term.URIRef('http://dbpedia.org/resource/Elvis_Presley'), rdflib.term.Literal('1935-01-08', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#date')))
(rdflib.term.URIRef('http://dbpedia.org/resource/Elvis_Presley'), rdflib.term.Literal('1935-01-01', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#date')))


**(Solo muestra lo de Elvis por que nuestro grafo solo comió datos de Elvis, hay que intentar otro 'sujeto')**

This is a Python tuple object. You can access the data inside it as you would a tuple, and you can call str() on the URIRef and Literal objects to return the string representation

In [9]:
for stmt in g.subject_objects(URIRef("http://dbpedia.org/ontology/birthDate")):
    print ("the person represented by", str(stmt[0]), "was born on", str(stmt[1]))

the person represented by http://dbpedia.org/resource/Elvis_Presley was born on 1935-01-01
the person represented by http://dbpedia.org/resource/Elvis_Presley was born on 1935-01-08


In [10]:
for stmt in g.subject_objects(URIRef("http://dbpedia.org/ontology/spouse")):
    print ("the person represented by", str(stmt[0]), "was married to", str(stmt[1]))

the person represented by http://dbpedia.org/resource/Elvis_Presley was married to http://dbpedia.org/resource/Priscilla_Presley
the person represented by http://dbpedia.org/resource/Priscilla_Presley was married to http://dbpedia.org/resource/Elvis_Presley


**Las ontologias como que están predefinidas en la dbpedia (en este sentido como que son los predicados guardados)**

In [11]:
g.parse("http://dbpedia.org/resource/Tim_Berners-Lee")
g.parse("http://dbpedia.org/resource/Albert_Einstein")
g.parse("http://dbpedia.org/resource/Margaret_Thatcher")

<Graph identifier=N3e307f08b1e94f63a378d640955ee582 (<class 'rdflib.graph.Graph'>)>

In [12]:
#We can now run our birth date call on the lot of them:
for stmt in g.subject_objects(URIRef("http://dbpedia.org/ontology/birthDate")):
     print ("the person represented by", str(stmt[0]), "was born on", str(stmt[1]))
#We repeat the representation but now with the graph updated

the person represented by http://dbpedia.org/resource/Elvis_Presley was born on 1935-01-08
the person represented by http://dbpedia.org/resource/Tim_Berners-Lee was born on 1955-06-08
the person represented by http://dbpedia.org/resource/Albert_Einstein was born on 1879-03-14
the person represented by http://dbpedia.org/resource/Albert_Einstein was born on 1879-01-01
the person represented by http://dbpedia.org/resource/Elvis_Presley was born on 1935-01-01
the person represented by http://dbpedia.org/resource/Tim_Berners-Lee was born on 1955-01-01
the person represented by http://dbpedia.org/resource/Margaret_Thatcher was born on 1925-10-13


---
## Querying the Semantic Web with SPARQL

In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON

In [9]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
#SPARQLWrapper("http://bio2rdf.org/sparql")

In [10]:
dir(sparql)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_cleanComments',
 '_createRequest',
 '_defaultGraph',
 '_defaultReturnFormat',
 '_getAcceptHeader',
 '_getRequestEncodedParameters',
 '_parseQueryType',
 '_query',
 'addCustomParameter',
 'addDefaultGraph',
 'addExtraURITag',
 'addNamedGraph',
 'addParameter',
 'agent',
 'clearParameter',
 'comments_pattern',
 'endpoint',
 'http_auth',
 'isSparqlQueryRequest',
 'isSparqlUpdateRequest',
 'method',
 'parameters',
 'passwd',
 'pattern',
 'query',
 'queryAndConvert',
 'queryString',
 'queryType',
 'requestMethod',
 'resetQuery',
 'returnFormat',
 'setCredentials',
 'setHTTPAuth',
 'setMethod',
 'setQuery',
 'setRequestMethod',
 'setReturnFormat',

In [11]:
sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?label
    WHERE { 
      <http://dbpedia.org/resource/Mexico> rdfs:label ?label .
    }
""")

In [12]:
sparql.setReturnFormat(JSON)

In [13]:
results = sparql.query().convert()
# the query result method convert in a format that we set in the sentence above, this case a JSON dictianary

In [14]:
type(sparql.query().convert())

dict

In [15]:
results

{'head': {'link': [], 'vars': ['label']},
 'results': {'bindings': [{'label': {'type': 'literal',
     'value': 'Mexico',
     'xml:lang': 'en'}},
   {'label': {'type': 'literal', 'value': 'المكسيك', 'xml:lang': 'ar'}},
   {'label': {'type': 'literal', 'value': 'Mexiko', 'xml:lang': 'de'}},
   {'label': {'type': 'literal', 'value': 'México', 'xml:lang': 'es'}},
   {'label': {'type': 'literal', 'value': 'Mexique', 'xml:lang': 'fr'}},
   {'label': {'type': 'literal', 'value': 'Messico', 'xml:lang': 'it'}},
   {'label': {'type': 'literal', 'value': 'メキシコ', 'xml:lang': 'ja'}},
   {'label': {'type': 'literal', 'value': 'Mexico (land)', 'xml:lang': 'nl'}},
   {'label': {'type': 'literal', 'value': 'Meksyk', 'xml:lang': 'pl'}},
   {'label': {'type': 'literal', 'value': 'México', 'xml:lang': 'pt'}},
   {'label': {'type': 'literal', 'value': 'Мексика', 'xml:lang': 'ru'}},
   {'label': {'type': 'literal', 'value': '墨西哥', 'xml:lang': 'zh'}}],
  'distinct': False,
  'ordered': True}}

In [16]:
for result in results["results"]["bindings"]:
    print (result["label"]["value"], 'in', result["label"]["xml:lang"])

Mexico in en
المكسيك in ar
Mexiko in de
México in es
Mexique in fr
Messico in it
メキシコ in ja
Mexico (land) in nl
Meksyk in pl
México in pt
Мексика in ru
墨西哥 in zh


---
## Bio2Rdf
[Bio2Rdf-Dataset-Summary](https://github.com/bio2rdf/bio2rdf-scripts/wiki/Bio2RDF-Dataset-Summary-Statistics)

We use a combination of the VOID vocabulary with our own custom vocabulary (described here). Each dataset is associated with its statistics using void:subset. Each statistics is typed using our custom vocabulary and has additional structure to describe its contents. We use YASGUI to execute the SPARQL queries [e.g. # triples for clinicaltrials]

In [2]:
sparql = SPARQLWrapper("http://node000002.cluster.ids.unimaas.nl/repositories/bio2rdf")

In [3]:
type(sparql)

SPARQLWrapper.Wrapper.SPARQLWrapper

In [8]:
dir(sparql.setCredentials)

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__func__',
 '__ge__',
 '__get__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__self__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

In [9]:
sparql.setCredentials("test", "test")

In [None]:
PREFIX data_vocab: <http://bio2rdf.org/dataset_vocabulary:>
  SELECT *
  FROM <http://bio2rdf.org/bio2rdf-ctd-statistics>
  WHERE {
   ?endpoint a data_vocab:Endpoint.
   ?endpoint <http://bio2rdf.org/dataset_vocabulary:has_predicate_unique_subject_unique_literal_count> ?anObj.
   ?anObj <http://bio2rdf.org/dataset_vocabulary:has_predicate> ?p.
   ?anObj <http://bio2rdf.org/dataset_vocabulary:has_subject_count> ?sc.
   ?anObj <http://bio2rdf.org/dataset_vocabulary:has_literal_count> ?lc.
  }

In [10]:
sparql.setQuery("""
    SELECT *
    WHERE { ?s ?p ?o  .
    } limit 2
""")

In [11]:
sparql.setReturnFormat(JSON)

In [12]:
results = sparql.query().convert()
results

{'head': {'vars': ['s', 'p', 'o']},
 'results': {'bindings': [{'o': {'type': 'uri',
     'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property'},
    'p': {'type': 'uri',
     'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'},
    's': {'type': 'uri',
     'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}},
   {'o': {'type': 'uri',
     'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property'},
    'p': {'type': 'uri',
     'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'},
    's': {'type': 'uri',
     'value': 'http://www.w3.org/2000/01/rdf-schema#subPropertyOf'}}]}}

In [43]:
sparql1 = SPARQLWrapper("http://bio2rdf.org/sparql")

In [46]:
#type counts
sparql1 = SPARQLWrapper("http://bio2rdf.org/sparql")
sparql1.setQuery("""
 PREFIX void: <http://rdfs.org/ns/void#>
 PREFIX ds: <http://bio2rdf.org/bio2rdf.dataset_vocabulary:>
    SELECT *
     { [] void:subset [ 
           a ds:Dataset-Type-Count; 
           void:class ?type; 
           void:entities ?count; 
           void:distinctEntities ?distinctCount;
       ]
     }
""")
sparql1.setReturnFormat(JSON)
results1 = sparql1.query().convert()
results1

{'head': {'link': [], 'vars': ['type', 'count', 'distinctCount']},
 'results': {'bindings': [], 'distinct': False, 'ordered': True}}

In [39]:
sparql.setReturnFormat(JSON)

In [40]:
results = sparql.query().convert()
# the query result method convert in a format that we set in the sentence above, this case a JSON dictianary

In [41]:
results

{'head': {'link': [], 'vars': ['label']},
 'results': {'bindings': [], 'distinct': False, 'ordered': True}}

In [30]:
for result in results["results"]["bindings"]:
    print (result["label"]["value"], 'in', result["label"]["xml:lang"])

Mexico in en
المكسيك in ar
Mexiko in de
México in es
Mexique in fr
Messico in it
メキシコ in ja
Mexico (land) in nl
Meksyk in pl
México in pt
Мексика in ru
墨西哥 in zh
