In [55]:
import pandas as pd
import numpy as np
from urllib.parse import quote
from rdflib import Graph, Namespace, URIRef, Literal, RDF
from rdflib.namespace import RDFS, OWL, XSD

In [56]:
# Load all the csv in the data older
students = pd.read_csv('data/Students.csv')
courses = pd.read_csv('data/Courses.csv')
courses_instances = pd.read_csv('data/Course_Instances.csv')
courses_plannings = pd.read_csv('data/Course_plannings.csv')
programme_courses = pd.read_csv('data/Programme_Courses.csv')
programmes = pd.read_csv('data/Programmes.csv')
registrations = pd.read_csv('data/Registrations.csv')
senior_teachers = pd.read_csv('data/Senior_Teachers.csv')
teaching_assistants = pd.read_csv('data/Teaching_Assistants.csv')
teacher_hours = pd.read_csv('data/Teacher_Hours.csv')

In [57]:
# read and parse the ontology file (.ttl)
# Load the RDFS schema
ontology_graph = Graph()
ontology_graph.parse("ontology.ttl", format="turtle")

# Create a new RDF graph for the triples from the data
rdf_graph = Graph()

# Define the namespaces
namespace = Namespace("http://example.org/ontology#")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")

In [58]:
# Iterate over the data and create RDF triples
for index, row in students.iterrows():
  student_uri = URIRef(namespace + "Student/" + quote(str(row['studentId']), safe='', encoding='utf-8', errors='strict'))
  rdf_graph.add((student_uri, RDF.type, namespace.Student))
  rdf_graph.add((student_uri, namespace.hasName, Literal(row['studentName'])))
  rdf_graph.add((student_uri, namespace.hasProgramme, URIRef(namespace + "Programme/" + str(row['programmeCode']))))
  rdf_graph.add((student_uri, namespace.hasYear, Literal(row['year'])))
  rdf_graph.add((student_uri, namespace.hasGraduated, Literal(row['graduated'])))
  rdf_graph.add((student_uri, namespace.worksAsTA, Literal(row['workAsTA'])))

# Iterate over the 'courses' dataframe
for index, row in courses.iterrows():
    course_uri = URIRef(namespace + "Course/" + quote(str(row['courseCode']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((course_uri, RDF.type, namespace.Course))
    rdf_graph.add((course_uri, namespace.hasCourseName, Literal(row['courseName'])))
    rdf_graph.add((course_uri, namespace.hasCredits, Literal(row['credits'])))
    rdf_graph.add((course_uri, namespace.hasLevel, Literal(row['level'])))
    rdf_graph.add((course_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((course_uri, namespace.hasDivisionName, Literal(row['divisionName'])))
    rdf_graph.add((course_uri, namespace.hasProgrammeName, Literal(row['programmeName'])))

# Iterate over the 'courses_instances' dataframe
for index, row in courses_instances.iterrows():
    instance_uri = URIRef(namespace + "CourseInstance/" + quote(str(row['instanceId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((instance_uri, RDF.type, namespace.CourseInstance))
    rdf_graph.add((instance_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((instance_uri, namespace.hasStudyPeriod, Literal(row['studyPeriod'])))
    rdf_graph.add((instance_uri, namespace.hasAcademicYear, Literal(row['courseInstanceAcademicYear'])))
    rdf_graph.add((instance_uri, namespace.hasTeacherId, Literal(row['teacherId'])))

# Iterate over the 'courses_plannings' dataframe
for index, row in courses_plannings.iterrows():
    planning_uri = URIRef(namespace + "CoursePlanning/" + quote(str(row['instanceId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((planning_uri, RDF.type, namespace.CoursePlanning))
    rdf_graph.add((planning_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((planning_uri, namespace.hasNumStudents, Literal(row['planningNumStudents'])))
    rdf_graph.add((planning_uri, namespace.hasSeniorPlannedHours, Literal(row['seniorPlannedHours'])))
    rdf_graph.add((planning_uri, namespace.hasAssistantPlannedHours, Literal(row['assistantPlannedHours'])))

# Iterate over the 'programme_courses' dataframe
for index, row in programme_courses.iterrows():
    programme_course_uri = URIRef(namespace + "ProgrammeCourse/" + quote(str(row['programmeCode']) + "_" + str(row['courseCode']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((programme_course_uri, RDF.type, namespace.ProgrammeCourse))
    rdf_graph.add((programme_course_uri, namespace.hasProgrammeCode, Literal(row['programmeCode'])))
    rdf_graph.add((programme_course_uri, namespace.hasStudyYear, Literal(row['studyYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasAcademicYear, Literal(row['programmeCourseAcademicYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((programme_course_uri, namespace.hasCourseType, Literal(row['courseType'])))

# Iterate over the 'programmes' dataframe
for index, row in programmes.iterrows():
    programme_uri = URIRef(namespace + "Programme/" + quote(str(row['programmeCode']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((programme_uri, RDF.type, namespace.Programme))
    rdf_graph.add((programme_uri, namespace.hasProgrammeName, Literal(row['programmeName'])))
    rdf_graph.add((programme_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((programme_uri, namespace.hasTeacherID, Literal(row['teacherId'])))

# Iterate over the 'registrations' dataframe
for index, row in registrations.iterrows():
    registration_uri = URIRef(namespace + "Registration/" + quote(str(row['instanceId']) + "_" + str(row['studentId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((registration_uri, RDF.type, namespace.Registration))
    rdf_graph.add((registration_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((registration_uri, namespace.hasStudentID, Literal(row['studentId'])))
    rdf_graph.add((registration_uri, namespace.hasStatus, Literal(row['status'])))
    rdf_graph.add((registration_uri, namespace.hasGrade, Literal(row['grade'])))

# Iterate over the 'teacher_hours' dataframe
for index, row in teacher_hours.iterrows():
    teacher_hours_uri = URIRef(namespace + "TeacherHours/" + quote(str(row['instanceId']) + "_" + str(row['teacherId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((teacher_hours_uri, RDF.type, namespace.TeacherHours))
    rdf_graph.add((teacher_hours_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasStudyPeriod, Literal(row['studyPeriod'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasAcademicYear, Literal(row['courseInstanceAcademicYear'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasAssignedHours, Literal(row['assignedHours'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasReportedHours, Literal(row['reportedHours'])))

# Iterate over the 'teaching_assistants' dataframe
for index, row in teaching_assistants.iterrows():
    teaching_assistant_uri = URIRef(namespace + "TeachingAssistant/" + quote(str(row['teacherId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((teaching_assistant_uri, RDF.type, namespace.TeachingAssistant))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDivisionName, Literal(row['divisionName'])))
    
# Iterate over the 'senior_teachers' dataframe
for index, row in senior_teachers.iterrows():
    senior_teacher_uri = URIRef(namespace + "SeniorTeacher/" + quote(str(row['teacherId']), safe='', encoding='utf-8', errors='strict'))
    rdf_graph.add((senior_teacher_uri, RDF.type, namespace.SeniorTeacher))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDivisionName, Literal(row['divisionName'])))

# Merge the ontology graph and the RDF graph
rdf_graph += ontology_graph

# Save the RDF graph to a file
rdf_graph.serialize(destination='output.rdf', format='xml')

<Graph identifier=Ndf1eb8aad3044374ace3f3693e93e555 (<class 'rdflib.graph.Graph'>)>