# Playing with RDFlib

In [1]:
# Install rdflib
# !pip install rdflib

In [2]:
# Import essential modules
import rdflib
from rdflib import Graph, Literal, BNode, Namespace, RDF, URIRef
from rdflib.namespace import FOAF, RDFS, OWL, XSD

# Your First RDF Graph

In [3]:
# Initialize a new graph
g = Graph()

# Create music resources
song = URIRef("http://example.org/music#BohemianRhapsody")
artist = URIRef("http://example.org/music#Queen")
title = Literal("Bohemian Rhapsody")

# Add triples about the song
g.add((song, RDF.type, URIRef("http://example.org/music#Song")))
g.add((song, URIRef("http://example.org/music#title"), title))
g.add((song, URIRef("http://example.org/music#performedBy"), artist))

# Print the triples
for s, p, o in g:
    print(f"{s} {p} {o}")

http://example.org/music#BohemianRhapsody http://example.org/music#title Bohemian Rhapsody
http://example.org/music#BohemianRhapsody http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://example.org/music#Song
http://example.org/music#BohemianRhapsody http://example.org/music#performedBy http://example.org/music#Queen


In [4]:
dir(g)

['_Graph__identifier',
 '_Graph__namespace_manager',
 '_Graph__store',
 '__abstractmethods__',
 '__add__',
 '__and__',
 '__annotations__',
 '__class__',
 '__cmp__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__init__',
 '__init_subclass__',
 '__isub__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__weakref__',
 '__xor__',
 '_abc_impl',
 '_bind_namespaces',
 '_process_skolem_tuples',
 'absolutize',
 'add',
 'addN',
 'all_nodes',
 'base',
 'bind',
 'cbd',
 'close',
 'collection',
 'commit',
 'compute_qname',
 'connected',
 'context_aware',
 'de_skolemize',
 'default_union',
 'destroy',
 'formula_aware',
 'identifier',
 'isomorphic',
 '

# Namespaces

In [5]:
# Define music namespace from the ontology
music = Namespace("http://example.org/music#")
g.bind("music", music)  # Bind prefix for cleaner serialization

# Use the namespace for cleaner code
g.add((music.BohemianRhapsody, RDF.type, music.Song))
g.add((music.BohemianRhapsody, music.duration, Literal(355, datatype=XSD.int)))
g.add((music.Queen, RDF.type, music.Artist))
g.add((music.Queen, music.name, Literal("Queen")))

<Graph identifier=N78dc08ff94eb428ab17dfa9cec59d543 (<class 'rdflib.graph.Graph'>)>

In [6]:
dir(music)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'removeprefix',
 'removesuffix',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit

# Loading External RDF/OWL Data

In [7]:
# Load the music ontology
g = Graph()
g.parse("./rdf_output/music_ontology.rdf", format="xml")

<Graph identifier=Nbbf3ec7a75a04118bb6aa4f88b56afb7 (<class 'rdflib.graph.Graph'>)>

In [8]:
# Check what classes are defined
classes_query = """
SELECT DISTINCT ?class WHERE {
    ?class rdf:type owl:Class .
}
"""

In [9]:
results = g.query(classes_query, initNs={"rdf": RDF, "owl": OWL})
print("Classes in the ontology:")
for row in results:
    print(row)

Classes in the ontology:
(rdflib.term.URIRef('http://example.org/music#Album'),)
(rdflib.term.URIRef('http://example.org/music#Genre'),)
(rdflib.term.URIRef('http://example.org/music#Award'),)
(rdflib.term.URIRef('http://example.org/music#Single'),)
(rdflib.term.URIRef('http://example.org/music#CollaborativeSong'),)
(rdflib.term.URIRef('http://example.org/music#Artist'),)
(rdflib.term.URIRef('http://example.org/music#Song'),)
(rdflib.term.URIRef('http://example.org/music#SuccessfulLabel'),)
(rdflib.term.URIRef('http://example.org/music#ExtendedPlay'),)
(rdflib.term.URIRef('http://example.org/music#EstablishedArtist'),)
(rdflib.term.URIRef('http://example.org/music#RecordLabel'),)


In [10]:
print(f"Total statements in ontology: {len(g)}")

Total statements in ontology: 17031


In [11]:
# dir(results)

# Basic SPARQL Queries

In [12]:
music = Namespace("http://example.org/music#")

## Query 1: Find all songs and their performers

In [13]:
query1 = """
SELECT ?song ?performer WHERE {
    ?song rdf:type music:Song .
    ?song music:performedBy ?performer .
}
"""
# SPARQL works by pattern matching against RDF triples
# ?song and ?performer are variables that get bound to actual URIs
# The period (.) separates triple patterns that must all match

In [14]:
results = g.query(query1, initNs={"music": music, "rdf": RDF})

# for row in results:
#     print(f"  {row}")

print(len(results))

298


## Query 2: Find artists signed to record labels

In [15]:
query2 = """
SELECT ?artist ?label WHERE {
    ?artist rdf:type music:Artist .
    ?artist music:signedTo ?label .
    ?label rdf:type music:RecordLabel .
}
"""
# This demonstrates joining across multiple classes
# All three patterns must match for the same ?artist and ?label

In [16]:
# This data is broken in the RDF, so no results...
results = g.query(query2, initNs={"music": music, "rdf": RDF})

for row in results:
    print(f"  {row}")

print(len(results))

0


## Query 3: Count songs by genre

In [17]:
query3 = """
SELECT ?genre (COUNT(?song) as ?count) WHERE {
    ?song rdf:type music:Song .
    ?song music:hasGenre ?genre .
    ?genre rdf:type music:Genre .
}
GROUP BY ?genre
ORDER BY DESC(?count)
"""
# GROUP BY aggregates results, COUNT tallies matches
# ORDER BY sorts results, DESC means descending order

In [18]:
results = g.query(query3, initNs={"music": music, "rdf": RDF})

for row in results:
    print(f"  {row}")

print(len(results))

  (rdflib.term.URIRef('http://example.org/music/genre_genre_0'), rdflib.term.Literal('39', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdflib.term.URIRef('http://example.org/music/genre_genre_4'), rdflib.term.Literal('39', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdflib.term.URIRef('http://example.org/music/genre_genre_9'), rdflib.term.Literal('37', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdflib.term.URIRef('http://example.org/music/genre_genre_13'), rdflib.term.Literal('33', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdflib.term.URIRef('http://example.org/music/genre_genre_6'), rdflib.term.Literal('32', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdflib.term.URIRef('http://example.org/music/genre_genre_12'), rdflib.term.Literal('28', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
  (rdfli

## Query 4: Find collaborative songs (multiple performers)

In [19]:
query4 = """
SELECT ?song (COUNT(?performer) as ?performer_count) WHERE {
    ?song rdf:type music:Song .
    ?song music:performedBy ?performer .
}
GROUP BY ?song
HAVING (COUNT(?performer) > 1)
"""
# HAVING filters grouped results (like WHERE but for aggregated data)
# This finds songs with more than one performer

In [20]:
# I think this data is broken too...
results = g.query(query4, initNs={"music": music, "rdf": RDF})

for row in results:
    print(f"  {row}")

print(len(results))

0


## Query 5: Find albums and their total track count

In [21]:
query5 = """
SELECT ?album (COUNT(?song) as ?track_count) WHERE {
    ?album rdf:type music:Album .
    ?song music:featuredOn ?album .
}
GROUP BY ?album
"""
# This counts how many songs appear on each album
# Demonstrates inverse relationships (songs featured on albums)

In [22]:
results = g.query(query5, initNs={"music": music, "rdf": RDF})

for row in results:
    print(f"  {row}")

print(len(results))

0


# Creating Core Music Classes and Properties

In [23]:
# Create a new graph for our ontology extension
g = Graph()
music = Namespace("http://example.org/music#")
g.bind("music", music)

## Define core classes

In [24]:
g.add((music.Playlist, RDF.type, OWL.Class))
g.add((music.Playlist, RDFS.label, Literal("Playlist")))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [25]:
g.add((music.Concert, RDF.type, OWL.Class))
g.add((music.Concert, RDFS.label, Literal("Concert")))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [26]:
g.add((music.MusicVideo, RDF.type, OWL.Class))
g.add((music.MusicVideo, RDFS.label, Literal("Music Video")))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

## Define some properties for Playlist

In [27]:
g.add((music.containsSong, RDF.type, OWL.ObjectProperty))
g.add((music.containsSong, RDFS.domain, music.Playlist))
g.add((music.containsSong, RDFS.range, music.Song))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [28]:
g.add((music.playlistName, RDF.type, OWL.DatatypeProperty))
g.add((music.playlistName, RDFS.domain, music.Playlist))
g.add((music.playlistName, RDFS.range, XSD.string))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [29]:
g.add((music.createdBy, RDF.type, OWL.ObjectProperty))
g.add((music.createdBy, RDFS.domain, music.Playlist))
g.add((music.createdBy, RDFS.range, FOAF.Person))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [30]:
# Define properties for Concert
g.add((music.performsAt, RDF.type, OWL.ObjectProperty))
g.add((music.performsAt, RDFS.domain, music.Artist))
g.add((music.performsAt, RDFS.range, music.Concert))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [31]:
g.add((music.concertDate, RDF.type, OWL.DatatypeProperty))
g.add((music.concertDate, RDFS.domain, music.Concert))
g.add((music.concertDate, RDFS.range, XSD.date))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [32]:
g.add((music.venue, RDF.type, OWL.DatatypeProperty))
g.add((music.venue, RDFS.domain, music.Concert))
g.add((music.venue, RDFS.range, XSD.string))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

## Define some properties for MusicVideo

In [33]:
g.add((music.videoFor, RDF.type, OWL.ObjectProperty))
g.add((music.videoFor, RDFS.domain, music.MusicVideo))
g.add((music.videoFor, RDFS.range, music.Song))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [34]:
g.add((music.director, RDF.type, OWL.DatatypeProperty))
g.add((music.director, RDFS.domain, music.MusicVideo))
g.add((music.director, RDFS.range, XSD.string))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [35]:
g.add((music.viewCount, RDF.type, OWL.DatatypeProperty))
g.add((music.viewCount, RDFS.domain, music.MusicVideo))
g.add((music.viewCount, RDFS.range, XSD.int))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

# Working with Individuals

In [36]:
# Create specific instances
my_playlist = URIRef("http://example.org/playlist/roadtrip2024")
queen_concert = URIRef("http://example.org/concert/wembley1986")
bohemian_video = URIRef("http://example.org/video/bohemian_rhapsody")

In [37]:
# Add instance data for playlist
g.add((my_playlist, RDF.type, music.Playlist))
g.add((my_playlist, music.playlistName, Literal("Road Trip 2024")))
g.add((my_playlist, music.containsSong, music.BohemianRhapsody))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [38]:
# Add instance data for concert
g.add((queen_concert, RDF.type, music.Concert))
g.add((queen_concert, music.concertDate, Literal("1986-07-12", datatype=XSD.date)))
g.add((queen_concert, music.venue, Literal("Wembley Stadium")))
g.add((music.Queen, music.performsAt, queen_concert))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [39]:
# Add instance data for music video
g.add((bohemian_video, RDF.type, music.MusicVideo))
g.add((bohemian_video, music.videoFor, music.BohemianRhapsody))
g.add((bohemian_video, music.director, Literal("Bruce Gowers")))
g.add((bohemian_video, music.viewCount, Literal(1500000000, datatype=XSD.int)))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

# Possible Restrictions for Music Domain

In [40]:
# Create restriction: albums that contain rock songs
rock_album_restriction = BNode()

In [41]:
g.add((rock_album_restriction, RDF.type, OWL.Restriction))
g.add((rock_album_restriction, OWL.onProperty, music.features))
g.add((rock_album_restriction, OWL.someValuesFrom, music.RockSong))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [42]:
# Define RockAlbum as albums with rock content
g.add((music.RockAlbum, RDF.type, OWL.Class))
g.add((music.RockAlbum, RDFS.subClassOf, music.Album))
g.add((music.RockAlbum, RDFS.subClassOf, rock_album_restriction))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [43]:
# Create cardinality restriction: singles have exactly one song
single_restriction = BNode()
g.add((single_restriction, RDF.type, OWL.Restriction))
g.add((single_restriction, OWL.onProperty, music.features))
g.add((single_restriction, OWL.cardinality, Literal(1, datatype=XSD.nonNegativeInteger)))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [44]:
g.add((music.Single, RDFS.subClassOf, single_restriction))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [45]:
# Create domain/range restriction for award relationships
g.add((music.hasWonAward, RDFS.domain, music.Artist))
g.add((music.hasWonAward, RDFS.range, music.Award))

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

# Basic Inference with Music Data

In [46]:
# rdflib has limited built-in OWL reasoning
# We can implement simple RDFS inference manually

def apply_rdfs_inference(graph):
    """Apply basic RDFS subclass inference"""
    inferred = Graph()
    inferred += graph  # Copy original
    
    # Apply subclass transitivity
    changed = True
    while changed:
        changed = False
        for s, p, o in inferred.triples((None, RDFS.subClassOf, None)):
            for s2, p2, o2 in inferred.triples((None, RDFS.subClassOf, s)):
                triple = (s2, RDFS.subClassOf, o)
                if triple not in inferred:
                    inferred.add(triple)
                    changed = True
    
    # Apply type inference from subclasses
    for individual, rdf_type, cls in inferred.triples((None, RDF.type, None)):
        for subcls, subclass_of, supercls in inferred.triples((cls, RDFS.subClassOf, None)):
            triple = (individual, RDF.type, supercls)
            if triple not in inferred:
                inferred.add(triple)
    
    return inferred

In [47]:
# Apply inference
g_inferred = apply_rdfs_inference(g)

In [48]:
# Query with inference
query = """
SELECT ?individual ?type WHERE {
    ?individual rdf:type ?type .
    ?type rdfs:subClassOf music:Album .
}
"""

In [49]:
results = g_inferred.query(query, initNs={"music": music, "rdf": RDF, "rdfs": RDFS})
print("Individuals that are albums (including subtypes):")
for row in results:
    print(f"  {row.individual} is a {row.type}")

Individuals that are albums (including subtypes):


# Serializing Music Ontology Data

In [50]:
# Serialize in different formats for different use cases
g.serialize(destination="toy_music.ttl", format="turtle")
g.serialize(destination="toy_music.owl", format="xml")
g.serialize(destination="toy_music.jsonld", format="json-ld")

<Graph identifier=Nb8cdabc6a805462bacd6c9670d52ccc0 (<class 'rdflib.graph.Graph'>)>

In [51]:
# Print turtle format (most human-readable)
print("Turtle serialization:")
print(g.serialize(format="turtle"))

Turtle serialization:
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix music: <http://example.org/music#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

music:Queen music:performsAt <http://example.org/concert/wembley1986> .

music:RockAlbum a owl:Class ;
    rdfs:subClassOf [ a owl:Restriction ;
            owl:onProperty music:features ;
            owl:someValuesFrom music:RockSong ],
        music:Album .

music:Single rdfs:subClassOf [ a owl:Restriction ;
            owl:cardinality "1"^^xsd:nonNegativeInteger ;
            owl:onProperty music:features ] .

music:concertDate a owl:DatatypeProperty ;
    rdfs:domain music:Concert ;
    rdfs:range xsd:date .

music:containsSong a owl:ObjectProperty ;
    rdfs:domain music:Playlist ;
    rdfs:range music:Song .

music:createdBy a owl:ObjectProperty ;
    rdfs:domain music:Playlist ;
    rdfs:range foaf:Person .

music:d

In [52]:
# Save only the new classes we created
new_classes_graph = Graph()
new_classes_graph.bind("music", music)

In [53]:
for s, p, o in g.triples((None, RDF.type, OWL.Class)):
    if str(s).startswith("http://example.org/music#"):
        # Add all triples about this class
        for s2, p2, o2 in g.triples((s, None, None)):
            new_classes_graph.add((s2, p2, o2))

In [54]:
new_classes_graph.serialize(destination="toy_new_music_classes.ttl", format="turtle")

<Graph identifier=Nb8980dd43a4c42d184aa3fb9b20a271a (<class 'rdflib.graph.Graph'>)>

Different serialization formats serve different purposes. 
* Turtle is human-readable for debugging
* XML integrates with OWL tools
* JSON-LD works with web applications.
* Separating new classes helps with modular ontology development.

.