In [1]:
# import Graph object from rdflib package
from rdflib import Graph

In [2]:
# parse the file supplied with this lesson
g = Graph().parse(source="pg30291.rdf")

In [3]:
# take the length of the file (number of statements)
len(g)

139

In [4]:
# you might already have data in a Python object...
f = open("pg30291.rdf")
text = f.read()
f.close()
text[0:40]

'<?xml version="1.0" encoding="utf-8"?>\n<'

In [5]:
# ...in which case, use parse method to parse data into Graph object
h = Graph().parse(data=text)
len(h)

139

In [7]:
# use simple query interface, RDF, to perform pattern matching on triples
from rdflib.namespace import RDF
for s, p, o in g.triples((None,RDF.type, None)):
    print("%s is of type %s" % (s,o))

http://www.gutenberg.org/files/30291/30291.txt is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.rdf is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.kindle.noimages is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.epub.noimages is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/files/30291/30291-h/30291-h.htm is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.kindle.images is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/files/30291/30291.zip is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.epub.images is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ is of type http://web.resource.org/cc/Work
http://www.gutenberg.org/files/30291/30291-8.txt is of type http://www.gutenberg.org/2009/pgterms/

In [11]:
# SPARQL equivalent of above pattern match
q = """
	PREFIX pgterms: <http://www.gutenberg.org/2009/pgterms/>

	SELECT ?s ?o WHERE {
		?s rdf:type ?o .
	}
	"""

In [13]:
# execute query method of the graph object
for s, o in g.query(q):
	print("%s is of type %s" % (s, o))

http://www.gutenberg.org/files/30291/30291.txt is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.rdf is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.kindle.noimages is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.epub.noimages is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/files/30291/30291-h/30291-h.htm is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.kindle.images is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/files/30291/30291.zip is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ebooks/30291.epub.images is of type http://www.gutenberg.org/2009/pgterms/file
http://www.gutenberg.org/ is of type http://web.resource.org/cc/Work
http://www.gutenberg.org/files/30291/30291-8.txt is of type http://www.gutenberg.org/2009/pgterms/

In [14]:
# get the book's author, as well as the book id
q = """
	PREFIX pgterms: <http://www.gutenberg.org/2009/pgterms/>

	SELECT DISTINCT ?book ?author WHERE {
		?book rdf:type pgterms:ebook .
		?book dcterms:creator ?agent .
		?agent pgterms:name ?author .
	}
	"""
for book, author in g.query(q):
	print("%s wrote %s" % (author, book))

Kelly, Eleanor Mercein wrote http://www.gutenberg.org/ebooks/30291


In [15]:
# serialize existing graph in turtle format (you may find it easier to read)
print(g.serialize(format="turtle"))


b'@prefix cc: <http://web.resource.org/cc/> .\n@prefix dcam: <http://purl.org/dc/dcam/> .\n@prefix dcterms: <http://purl.org/dc/terms/> .\n@prefix pgterms: <http://www.gutenberg.org/2009/pgterms/> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix xml: <http://www.w3.org/XML/1998/namespace> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n<http://www.gutenberg.org/> a cc:Work ;\n    cc:license <http://www.gnu.org/licenses/gpl.html> ;\n    rdfs:comment """Archives containing the RDF files for *all* our books can be downloaded at\n            http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog""" .\n\n<http://www.gutenberg.org/2009/agents/34497> a pgterms:agent ;\n    pgterms:alias "Kelly, Mrs. (Eleanor)",\n        "Mercein, Eleanor",\n        "Mercien, Eleanor Kelly" ;\n    pgterms:birthdate 1880 ;\n    pgterms:deathdate 1968 ;\n    pgterms:name "Kelly, Eleanor Mercein