In [1]:
import pandas as pd
from pathlib import Path
from rdflib import Graph, Literal, Namespace, RDF, URIRef
from rdflib.namespace import SKOS, DCTERMS

# path to curriculum data
curriculum_xlsx = Path('./data/Lerhplan_all.xlsx')
curriculum_txt = Path('./data/curriculum_all_txt')
curriculum_csv = Path('./data/curriculum_all_csv')

# create Node Class
class Node:
    def __init__(self, **kwargs):
        self.id = kwargs.get('_id')
        self.prefLabel = kwargs.get('prefLabel')
        self.description = kwargs.get('description')
        self.educationalLevel = kwargs.get('educationalLevel')
        self.educationalContext = kwargs.get('educationalContext')
        self.notation = kwargs.get('notation')
        self.level = kwargs.get('level')
        
        self.children = []

    def __repr__(self):
        return self.prefLabel

In [2]:
# read in xlsx to df
df = pd.read_excel(curriculum_xlsx, 'CF Item', index_col=None)

In [3]:
# split human coding scheme column into multiple columns
df[['notation', 'educationalContext', 'educationalLevel', 'smartLevel']] = df['humanCodingScheme'].str.split('_', expand=True)

In [4]:
# split fullStatement in prefLabel and description
df[['prefLabel', 'description']] = df['fullStatement'].str.split('-', n=1, expand=True)

In [5]:
# split humanCodingScheme in notation, educationalContext, educationalLevel
df[['notation', 'educationalContext', 'educationalLevel', _]] = df['humanCodingScheme'].str.split('_', n=3, expand=True)

In [6]:
# add an level attribute
try:
    df.insert(loc=0,column='level', value=0)
except:
    pass


In [7]:
df[['level']] = df['smartLevel'].apply(lambda x: x.count('.'))

In [8]:
# convert df to list of dicts
data = df.to_dict('records')

In [19]:
root = Node()

try:
    for record in data:
        last = root
        for _ in range(record['level']):
            last = last.children[-1]
        if record['level'] == 0:
            last.children.append(Node(
                _id = record['identifier'], 
                prefLabel = record['prefLabel'], 
                notation = record['notation'],
                description = record['description'],
                educationalLevel = record['educationalLevel'],
                educationalContext = record['educationalContext'],
                level = record['level']
            ))
        else:
            last.children.append(Node(
                _id = record['identifier'], 
                prefLabel = record['prefLabel'], 
                notation = record['notation'],
                description = record['description'],
                level = record['level']
            ))
except IndexError:
    pass

In [39]:
root.children[0].educationalContext

'grundschule'

In [16]:
 # if not root node don't append educationalContext and educationalLevel to children
# because these properties are inherited and can be reasoned from the
# parent property
def delete_from_children(root):
    for child in root.children:
        if child.level != 0:
            try:
                del(
                    child.educationalContext, 
                    child.educationalLevel
                )
            except:
                pass
        delete_from_children(child)

In [None]:
def print_tree(root, depth=0):
    for child in root.children:
        print('  ' * depth + '%r' % child)
        print_tree(child, depth + 1)

In [18]:
# print(print_tree(root))

In [None]:
# TODO add relations?

name_systematik = 'curriculumd_bayern'
filename = Path.cwd('data' / 'curriculum_bayern.ttl')
g = Graph()
n = Namespace("https://example-perma-id/" + name_systematik + "/")

category = URIRef(n)

title = Literal(name_systematik, lang="de")
description = Literal(name_systematik, lang="de")
creator = Literal("<https://creator.com>")

# Add triples using store's add method.
g.add( (category, RDF.type, SKOS.ConceptScheme) )
g.add( (category, DCTERMS.title, title) )
g.add( (category, DCTERMS.description, description) )
g.add( (category, DCTERMS.creator, creator) )

# define relevant predicates

narrower = 'http://www.w3.org/2004/02/skos/core#narrower'
broader = 'http://www.w3.org/2004/02/skos/core#broader'
topConceptOf = 'http://www.w3.org/2004/02/skos/core#topConceptOf'
note = 'http://www.w3.org/2004/02/skos/core#note'


def add_items(root):
    for item in root.children:

        node = n + URIRef(item.id)
        node_prefLabel = Literal(item.prefLabel, lang="de")
        node_note = Literal(item.note, lang="de")

        g.add( (node, RDF.type, SKOS.Concept) )
        g.add( (node, SKOS.prefLabel, node_prefLabel))
        g.add( (node, SKOS.note, node_note))
        g.add( (node, SKOS.inScheme, category))
        
        if item.children != []:
            for child in item.children:
                g.add( (node, SKOS.narrower, n + URIRef(child.id)))
                g.add( (n + URIRef(child.id), SKOS.broader, node))

        add_items(item)
            
add_items(root)

for child in root.children:
    node = n + URIRef(child.id)
    g.add( (category, SKOS.hasTopConcept, node))
    g.add( (node, SKOS.topConceptOf, category ))

# Bind a few prefix, namespace pairs for more readable output
g.bind("dct", DCTERMS)
g.bind("skos", SKOS)

output = g.serialize(format='turtle', base=n).decode("utf-8")

with open(filename + '.ttl', 'w') as f:
    f.write(output)
    f.close()

In [None]:
df.head(50)