# Build nodes and edges from Sparql query

This notebook builds a nodes and edges file to be used with visjs network.

In [4]:
from pathlib import Path
import os
import json
from rdflib import Graph, Namespace
import re
from SPARQLWrapper import SPARQLWrapper, TURTLE
import sys

# add OpenEduhub Namespace as OEH
SDO = Namespace("http://schema.org/")


def write_file(filename, data):
    with open(filename, "w") as f:
        f.write(data)
        f.close()

def openJsonFile(json_file):
    with open(json_file) as f:
        data = json.load(f)
    return data
      

def prettifyFileList(file_list):
    pretty_list = []
    for i, item in enumerate(file_list):
        label = str(i) + ": " + item
        pretty_list.append(label)
    return pretty_list


def listFiles(mypath):
    f = []
    for file in os.listdir(mypath):
        if file.endswith(".json"):
            f.append(os.path.join(file))
    return f


def DiffList(l1, l2):
    return (list(set(l1) - set(l2)))


def getNodeId(node):
    if node.toPython()[-1] == "/":
        end = -2
    else:
        end = -1
    return "/".join(node.toPython().split('/')[-2:])


def getNodeName(graph, node):
    return graph.value(node, SDO.name).value


def getPrimaryNodes(nodes, edges):
    all_nodes = [ node["id"] for node in nodes]
    all_tos = [ edge["to"] for edge in edges]

    primary_nodes = DiffList(all_nodes, all_tos)

    return primary_nodes


def addNode(nodes, _id, label, group):
    node = {
        "id": _id,
        "label": label,
        "group": group
    }
    nodes.append(node)
    return nodes


def addRelation(fromNodeId, toNodesArray, edges):
    for node in toNodesArray:
        edge = {
            "from": fromNodeId,
            "to": node
        }
        edges.append(edge)
    return edges

def removeWhitespace(string):
    return re.sub(r"\s+", "", string)


def createNodesAndEdgesFromGraph(sparql_result, discipline, educationalLevel):
    nodes = []
    edges = []
    
    course_code = discipline + " " + educationalLevel
    
    print(len(sparql_result))
    
    for s, p, o in sparql_result.triples( (None, SDO.name, None) ):
        node = {
            "id": s.toPython(),
            "label": getNodeName(sparql_result, s),
            "group": course_code
        }
        if node not in nodes:
            nodes.append(node)
    
    for s, p, o in sparql_result.triples( (None, SDO.hasPart, None) ):
        edge = {
            "from": s.toPython(),
            "to": o.toPython()
        }
        if edge not in edges:
            edges.append(edge)
            
    
    print(f"length of nodes: {len(nodes)}")
    print(f"length of edges: {len(edges)}")
    
    # get primary nodes to connect to core node
    primary_nodes = getPrimaryNodes(nodes, edges)
    
    # create the core node
    core_id = discipline + " " + educationalLevel
    core_label = discipline + " " + educationalLevel
    
    nodes = addNode(nodes, core_id, core_label, course_code)
    
    # add relation from core node to primary nodes
    edges = addRelation(core_id, primary_nodes, edges)
    
    return nodes, edges

In [15]:
def point_on_circle(i):
    '''
        Finding the x,y coordinates on circle, based on given angle
    '''
    from math import cos, sin, pi
    #center of circle, angle in degree and radius of circle
    center = [0,0]
    angle = pi / i
    radius = 400
    x = center[0] + (radius * cos(angle))
    y = center[1] + (radius * sin(angle))

    return x,y

def placeNodesInCircle(nodes):
    # get distinct groups
    groups = set([node["group"] for node in nodes ])
    print(f"Found {groups} groups.")
    
    for i, group in enumerate(groups):
        x, y = point_on_circle(i+1)
        for node in nodes:
            if node["group"] == group:
                node["x"] = x
                node["y"] = y
    return nodes


def querySparql(discipline, educationalLevel):
    endpoint_url = "http://localhost:3030/ds/sparql"

    query = """
PREFIX text: <http://jena.apache.org/text#>
PREFIX sdo: <http://schema.org/>
PREFIX curr: <http://w3id.org/openeduhub/curricula/curriculum_bayern/>
PREFIX oeh: <http://w3id.org/openeduhub/vocabs/> 


CONSTRUCT {{
    ?nodes sdo:name ?nodeName ;
        sdo:courseCode ?courseCode ;
        sdo:hasPart ?parts .
}}

WHERE 
{{
  GRAPH <http://w3id.org/openeduhub/curricula/curriculum_bayern/#> {{

  ?nodes sdo:about ?o .
  ?o text:query(sdo:name '{discipline}'@de) .        

  ?nodes sdo:educationalLevel ?bNEduLevel .
  ?bNEduLevel text:query(sdo:name "{educationalLevel}") .
  
  ?nodes oeh:educationalContext ?bNEducontext .
  ?bNEducontext text:query(sdo:name "mittelschule") .
  
  ?nodes sdo:name ?nodeName .
  ?nodes sdo:courseCode ?courseCode .
  OPTIONAL {{ ?nodes sdo:hasPart ?parts . }}
  }}
 }}
""".format(discipline=discipline, educationalLevel=educationalLevel)
    
    def get_results(endpoint_url, query): 
        user_agent = "Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
        sparql.setQuery(query)
        return sparql.query().convert()

    results = get_results(endpoint_url, query)

    return results


def buildNodesAndEdgesFromSparql(): 
    user_input = input("Enter discipline and educationalLevel comma seperated.")
    
    nodes_all = []
    edges_all = []
    
    queries = user_input.split(";")
    for i, query in enumerate(queries):
        discipline, educationalLevel = query.split(",")
        
        discipline = removeWhitespace(discipline)
        educationalLevel = removeWhitespace(educationalLevel)
        
        message = (
            "Sending to sparql with: \n"
            f"\t discipline: {discipline} \n"
            f"\t educational Level: {educationalLevel}"
        )
        
        print(message)
        sparql_result = querySparql(discipline, educationalLevel)
        print(sparql_result)
        
        nodes, edges = createNodesAndEdgesFromGraph(sparql_result, discipline, educationalLevel)
        nodes_all += nodes
        edges_all += edges
        
        if i+1 < len(queries):
            print("got result back, sending next query...")
    
    # go through nodes and place in circle
    nodes = placeNodesInCircle(nodes)
    
    
    nodes_js = "const Nodes = " + str(nodes_all) + "\n export default Nodes"
    edges_js = "const Edges = " + str(edges_all) + "\n export default Edges"

    filename_nodes = Path.cwd() / "data" / ("nodes" + ".js")
    filename_edges = Path.cwd() / "data" / ("edges" + ".js")
    
    write_file(filename_nodes, nodes_js)
    write_file(filename_edges, edges_js)

    print("Done!")


In [16]:
buildNodesAndEdgesFromSparql()

Enter discipline and educationalLevel comma seperated. englisch, 5


Sending to sparql with: 
	 discipline: englisch 
	 educational Level: 5
[a rdflib:ConjunctiveGraph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']]
0
length of nodes: 0
length of edges: 0
Found {'englisch 5'} groups.
Done!
