# Run this notebook in a virtualenv

```
virtualenv --no-site-packages _notebook
pip install -r requirements.txt
./_notebook/bin/jupyter notebook
```


In [1]:
import sys
from rdflib import Graph
from rdflib.namespace import Namespace, RDF, XSD, SKOS, RDFS
from anytree import Node, RenderTree, ContRoundStyle
from anytree.exporter import DotExporter

from SPARQLWrapper import SPARQLWrapper, JSON

from pprint import pprint

In [6]:
num = 0

In [7]:
def buildIPTCTree(ref):
    iptc = Node("IPTC", wikidata=None, label='IPTC') 
    for top_ref in g.objects(ref, SKOS.hasTopConcept):
        wd = findWikiData(top_ref)
        
        top = generateNode(top_ref, iptc)
        for sub_ref in g.objects(top_ref, SKOS.narrower):
            sub = generateNode(sub_ref, top)
            for subsub_ref in g.objects(sub_ref, SKOS.narrower):
                if len(g.preferredLabel(subsub_ref)) > 0:
                    subsub = generateNode(subsub_ref, sub)
    sys.stdout.flush()
    print("")
    return iptc

def generateNode(ref, parent):
    global num
    num = num + 1
    sys.stdout.write("\r%s" % (num*'.'))
    wikidata = findWikiData(ref)
    node = Node(ref, label=unicode(g.preferredLabel(ref)[0][1]), wikidata=wikidata, parent=parent)
    return node

In [8]:
def findWikiData(ref):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

    # get wikidata ref from URI
    iptc_code = ref.replace('http://cv.iptc.org/newscodes/', '')

    # ?item wdt:P5429 ?iptc .
    # FILTER (?iptc IN ("mediatopic/20000735", "productgenre/entertainment" ) )
    # WikiData Property
    sparql.setQuery("""
            SELECT ?item ?itemLabel
            WHERE
            {
               ?item wdt:P5429 "%s" .
               SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
            }
            """ % (iptc_code))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    if results['results']['bindings']:
        return results['results']['bindings'][0]
    return None

In [9]:
g = Graph()
g.parse("http://cv.iptc.org/newscodes/mediatopic/?format=rdfxml&lang=de")

<Graph identifier=Nfa9268222c204faa893dee50e088c785 (<class 'rdflib.graph.Graph'>)>

In [10]:
# get top concepts with 2 levels down (3 levels total)
ref = next(g.subjects(RDF.type, SKOS.ConceptScheme))
iptc = buildIPTCTree(ref)
for pre, fill, node in RenderTree(iptc, style=ContRoundStyle()):
    wd = node.wikidata
    if wd:
        wd = wd['item']['value'].replace('http://www.wikidata.org/entity/', '')

    print("%s%s (%s)" % (pre, node.label, wd))

...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
IPTC (None)
├── Erziehung, Ausbildung (Q8434)
│   ├── Religionserziehung (Q1412471)
│   ├── Eltern (None)
│   ├── Schule (Q3914)
│   │   ├── Hochschule (Q136822)
│   │   ├── Gymnasium (Q9826)
│   │   ├── Haupt-und Realschule (Q149566)
│   │   ├── Grundschule (Q9842)
│   │   ├── Vorschule/Kindergarten (Q1076052)
│   │   ╰── Weiterbildung (Q765192)
│   ├── Lehren und Lernen (None)
│   │   ├── Prüfung (None)
│   │   ├── Schüler (Q48282)
│ 