# UE02 - RDF and RDF Schema

Before you start with this notebook, complete the eight tasks in the `1. RDF` sheet of `SemAI.jar`. You will then make use of your solutions in this notebook. 

## Task 0: Preparation

Preparation (Installs and Imports). 




In [None]:
!pip install -q rdflib 
!pip install -q pydot
!pip install -q owlrl

!pip install networkx pyvis

import rdflib
from rdflib import Graph, Literal, RDF, URIRef, BNode, Namespace, Dataset
import networkx as nx
from pyvis.network import Network
import requests
from IPython.display import display, HTML, Image
import os
import pydot
import owlrl
from rdflib.namespace import FOAF , XSD , RDFS

## Task 1:  Improve interactive RDF graph visualization (1 pt)

Improve function `visualize_graph_pyvis` (from `V01_rdf.ipynb`) as follows:
- an optional `base` parameter
- abbreviate the labels of nodes and edges in the same way as in `visualize_graph`. 
- make sure that blank node IDs are not shown in the visualization. 

Optional features: 
- use different graphical forms for literals and URIs
- (add further improvements as you like)

Test the function with `rdf_str` and with your solution to task `0. Intro` in the `1. RDF` sheet in `SemAI.jar`.

In [None]:
def visualize_graph(g,base=None):

  def node_id(graph,term):
    if isinstance(term,Literal): return term.n3(graph.namespace_manager)
    else: return f"\"{term.n3(graph.namespace_manager)}\""

  def add_node(dg,g,t,base):
    if isinstance(t,URIRef):
      lbl = f"\"{t.n3(g.namespace_manager)}\""
      if(base): lbl = lbl.replace(base,"")
      if(len(lbl)>25): lbl = lbl[:12] + "..." +  lbl[-12:] 
      dg.add_node(pydot.Node( node_id(g,t), label=lbl ))
    if isinstance(t,Literal):
      dg.add_node(pydot.Node( node_id(g,t), label=t.n3(g.namespace_manager), shape="box"))
    if isinstance(t,BNode):
      dg.add_node(pydot.Node( node_id(g,t), label=""))    

  def add_edge(dg,g,s,p,o):
    dg.add_edge(pydot.Edge(node_id(g,s), node_id(g,o), label=f"\"{p.n3(g.namespace_manager)}\""))

  dg = pydot.Dot('my_graph', graph_type='digraph',layout='sfdp', splines='curved' )

  for subj in g.subjects(None,None): add_node(dg,g,subj,base)
  for obj in g.objects(None,None): add_node(dg,g,obj,base)
  for (s,p,o) in g: add_edge(dg,g,s,p,o)   

  display(Image(dg.create_png()))

def visualize_graph_pyvis(g:Graph, base:str=None):
    LITERAL_PREFIX="___INTERNAL___LITERAL___"

    # Create the NetworkX graph
    nx_graph = nx.DiGraph()

    # for s, p, o in g:
    #     nx_graph.add_edge(s, o, label=p)

    for s, p, o in g:
      _s = s.n3(g.namespace_manager)
      _o = o.n3(g.namespace_manager)
      _p = p.n3(g.namespace_manager)

      # Replace base
      if base:
        _s = _s.replace(base,"")
        _o = _o.replace(base,"")
        _p = _p.replace(base,"")

      # Abbreviate labels
      # We use horizontal ellipsis instead of three dots because it is more nice and clean for data display: https://www.compart.com/en/unicode/U+2026
      _s = _s[:12] + '\u2026'+  _s[-12:] if len(_s)> 25 else _s
      _o = _o[:12] + '\u2026'+ _o[-12:] if len(_o)> 25 else _o
      _p = _p[:12] + '\u2026'+ _p[-12:] if len(_p)> 25 else _p

      # Do not show blank node IDs
      # Slides page 22: Only subject or object can be a blank node, so we do not need to check and change the predicate.
      if isinstance(s,BNode):
        _s=""
      if isinstance(o,BNode):
        _o=""     

      # Use a different graphical form for literals
      # Slides page 22: Only object can be a literal, so we do not need to check and change subject and predicate.
      if isinstance(o,Literal):
        _o=LITERAL_PREFIX+_o

      nx_graph.add_edge(_s, _o, label=_p)

    # Create a PyVis network graph
    # set directed to true - otherwise an edge is lost in the visualization of 3.Reification
    pyvis_graph = Network(notebook=True, directed=True, cdn_resources='in_line',bgcolor="#EEEEEE")
    pyvis_graph.from_nx(nx_graph)

    # Customize the node appearance
    for node in pyvis_graph.nodes:
        # Use a different graphical form for literals
        if node["label"].startswith(LITERAL_PREFIX): 
          node["shape"]="square"
          node["label"]=node["label"].replace(LITERAL_PREFIX,"")
        else: node["shape"] = "dot"
        node["size"] = 10
        node["font"] = {"size": 10}

    # Customize the edge appearance
    for edge in pyvis_graph.edges:
        edge["width"] = 0.5
        edge["font"] = {"size": 8, "align": "middle"}
        edge["arrows"] = "to"

    # Define the HTML file name
    html_file = 'graph.html'    
    
    # Show the graph in the notebook
    pyvis_graph.show(html_file)

    # Check if the file exists
    if os.path.isfile(html_file):
        # Read the content of the HTML file
        with open(html_file, 'r') as file:
            html_content = file.read()
        # Display the HTML content in the notebook
        display(HTML(html_content))
    else:
        print(f"File not found: {html_file}")

In [None]:
rdf_str = """BASE   <http://example.org/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX schema: <http://schema.org/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX wd: <http://www.wikidata.org/entity/>
 
<bob#me>
   a foaf:Person ;
   foaf:knows <alice#me> ;
   schema:birthDate "1990-07-04"^^xsd:date ;
   foaf:topic_interest wd:Q12418 .
   
wd:Q12418
  dcterms:title "Mona Lisa" ;
  dcterms:creator <http://dbpedia.org/resource/Leonardo_da_Vinci> .

<http://data.europeana.eu/item/04802/243FA8618938F4117025F17A8B813C5F9AA4D619>
  dcterms:subject wd:Q12418 .
"""

g_rdf_str = Graph()
g_rdf_str.parse(data=rdf_str)
print(g_rdf_str.serialize(format="turtle", base="http://example.org/"))
visualize_graph(g_rdf_str, base="http://example.org/")
visualize_graph_pyvis(g_rdf_str, base="http://example.org/")


In [None]:
t0 = """@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix ex:   <http://www.ex.org/> .

ex:John a foaf:Person.
ex:John foaf:knows ex:Mary.
"""

g_t0 = Graph()
g_t0.parse(data=t0, format="text/turtle")
print(g_t0.serialize(format="turtle", base=None))
visualize_graph_pyvis(g_t0)

## Task 2:  Print RDF graph as HTML table (1 pt)

Implement a function `rdf2htmltable(g)` that 
- takes as parameter an rdflib.Graph 
- generates and displays an HTML table representing that graph with
  - one line per RDF statement 
  - three columns (subject, predicate, object) 
  - URIs should be shown in abbreviated form and be represented as links (`href=<full URI>`)

Test the function with `rdf_str` and with your solution to task `0. Intro` in the `1. RDF` sheet in `SemAI.jar`.

In [None]:
def rdf2htmltable(g:Graph):
  table_rows = []
  for s, p ,o in  g:
    _s = '<a href="{0}">{1}</a>'.format(s, s.n3(g.namespace_manager).replace("<","").replace(">","")) 
    _p = '<a href="{0}">{1}</a>'.format(p, p.n3(g.namespace_manager).replace("<","").replace(">",""))
    _o = '<a href="{0}">{1}</a>'.format(o, o.n3(g.namespace_manager).replace("<","").replace(">",""))
    row = '<tr><td>{0}</td><td>{1}</td><td>{2}</td></tr>'.format(_s, _p, _o)
    table_rows.append(row)
  table_header =  '<tr><th>Subject</th><th>Predicate</th><th>Object</th></tr>'
  table_html = '<table>{0}{1}</table>'.format(table_header, ''.join(table_rows))
  display(HTML(table_html))



In [None]:
rdf2htmltable(g_rdf_str)

In [None]:
rdf2htmltable(g_t0)

## Task 3: A function for parsing and displaying an RDF graph (1 pt)

A function `parse_display_rdf(str)` that takes as parameter a string which represents an RDF graph in Turtle notation and 
- produces an rdflib.Graph from that string
- prints the graph in Turte notation
- prints the graph in RDF/XML
- visualizes it using `visualize_graph` (to be taken from `V01_rdf.ipynb`)
- visualizes it using (your improved version of) `visualize_graph_pyvis`
- outputs it using `rdf2htmltable` (only if you have implemented this function)

Test the function with `rdf_str` and with your solution to task `0. Intro` in the `1. RDF` sheet in `SemAI.jar`.

In [None]:
def parse_display_rdf(s:str):
  g = Graph()
  g.parse(data=s, format="text/turtle")
  print("Turtle:\n"+g.serialize(format="turtle"))
  print("RDF/XML:\n"+g.serialize(format="xml"))
  visualize_graph(g)
  visualize_graph_pyvis(g)
  rdf2htmltable(g)




In [None]:
parse_display_rdf(t0)

In [None]:
parse_display_rdf(rdf_str)

# Tasks 4-10 (1 point each)

For each of the remaining 7 tasks in the "1. RDF" sheet in `SemAI.jar` do the following: 
- add a text cell in this notebook 
  - with the description of the task from `SemAI.jar` 
  - with number and title (e.g., **1. Simple Data Graph**)  from `SemAI.jar` as header 
- add a code cell where you apply `parse_display_rdf(str)` on your solution

# 1. Simple Data Graph

Create an RDF graph in Turtle notation. Use the FOAF vocabulary to state the following.

* Mary and Jim are persons.
* Mary knows Jim.
* Mary is 27 years old.

The URIs for the two persons should be http://www.ex.org/person#Mary and http://www.ex.org/person#Jim. The age of Mary should be represented as an integer.

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix :     <http://www.ex.org/person#> .

:Mary 
a foaf:Person;
foaf:age "27"^^xsd:integer;
foaf:knows :Jim.

:Jim rdf:type foaf:Person.
"""
parse_display_rdf(solution)

# 2. Simple Schema

Create a vocabulary using RDFS in Turtle. Specify

* Classes *Company*, *Employee*, and *Person*
* Property *worksFor* between *Employee* and *Company*
* Property *salary* of *Employee* with Integer as data type
* Class *Employee* is a subclass of *Person*

Use XSD for data types. The URIs of classes and properties are in namespace <http://www.ex.org/vocabulary#>, for example, <http://www.ex.org/vocabulary#Company>

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .
@prefix :     <http://www.ex.org/vocabulary#> .

:Company a rdfs:Class.
:Employee a rdfs:Class.
:Person a rdfs:Class.
:Employee rdfs:subClassOf :Person.

:worksFor rdf:type rdf:Property.
:worksFor rdfs:domain :Employee.
:worksFor rdfs:range :Company.

:salary rdf:type rdf:Property.
:salary rdfs:domain :Employee.
:salary rdfs:range xsd:integer.
"""
parse_display_rdf(solution)

# 3. Reification

Create an RDF graph in Turtle notation. Use the RDF vocabulary where applicable and the example namesspace (ex:) for all other resources (ex:Mary, ex:John, ex:says, ex:loves). Hint: the lecture slides contain a similar reification example.

* Mary says that John loves her.

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix ex:   <http://www.ex.org/> .

ex:Statement a rdf:Statement.
ex:Mary ex:says ex:Statement.

ex:Statement rdf:subject ex:John.
ex:Statement rdf:object ex:Mary.
ex:Statement rdf:predicate ex:loves.
"""
parse_display_rdf(solution)

# 4. Blank Node

Create an RDF graph in Turtle notation. Use the RDF and FOAF vocabularies where applicable and the example namesspace (ex:) for the other resources.

* John knows a person, who knows Mary.
* Use a blank node to represent that anonymous person.

In [None]:
solution = """@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix ex:   <http://www.ex.org/> .

ex:John foaf:knows [
    foaf:knows ex:Mary;
    a foaf:Person
] .
"""
parse_display_rdf(solution)

# 5. Multiple Classification

Create an RDF graph in Turtle notation. Use the RDF vocabulary where applicable and the example namesspace (ex:) for the other resources.

* John is an instance of SoccerPlayer and of Student.

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix ex:   <http://www.ex.org/> .

ex:John rdf:type ex:SoccerPlayer.
ex:John rdf:type ex:Student.
"""
parse_display_rdf(solution)

# 6. Metamodeling
Create an RDF graph in Turtle notation. Use the RDF and RDF Schema vocabularies where applicable and the example namesspace (ex:) for the other resources.

* *Dog* and *Cat* are instances of *Species* and subclasses of *Animal*.
* *Lassie* is an instance of *Dog*.

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix ex:   <http://www.ex.org/> .

ex:Dog a ex:Species.
ex:Dog rdfs:subClassOf ex:Animal.
ex:Cat a ex:Species.
ex:Cat rdfs:subClassOf ex:Animal.
ex:Lassie a ex:Dog.
"""
parse_display_rdf(solution)

#7. Properties

Create an RDF graph in Turtle notation. Use the RDF and RDF Schema vocabularies where applicable and the example namesspace (ex:) for the other resources, e.g., ex:childOf, ex:descendantOf.

* Everyone who is a *child* of someone, is also a *descendant* of that someone.

In [None]:
solution="""@prefix rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix ex:   <http://www.ex.org/> .

ex:childOf rdfs:subPropertyOf ex:descendantOf.
"""
parse_display_rdf(solution)