In [1]:
import pandas as pd
import numpy as np
from urllib.parse import quote
from rdflib import Graph, Namespace, URIRef, Literal, RDF
from rdflib.namespace import RDFS, OWL, XSD

In [2]:
# Load all the csv in the data older
students = pd.read_csv('data/Students.csv')
courses = pd.read_csv('data/Courses.csv')
courses_instances = pd.read_csv('data/Course_Instances.csv')
courses_plannings = pd.read_csv('data/Course_plannings.csv')
programme_courses = pd.read_csv('data/Programme_Courses.csv')
programmes = pd.read_csv('data/Programmes.csv')
registrations = pd.read_csv('data/Registrations.csv')
senior_teachers = pd.read_csv('data/Senior_Teachers.csv')
teaching_assistants = pd.read_csv('data/Teaching_Assistants.csv')
teacher_hours = pd.read_csv('data/Teacher_Hours.csv')

In [28]:
courses_instances.head()

Unnamed: 0,courseCode,studyPeriod,courseInstanceAcademicYear,instanceId,teacherId
0,1204,1.0,2013-2014,I-1,19610918-0027
1,1082,3.0,2013-2014,I-2,19620522-0023
2,1190,4.0,2013-2014,I-3,19560705-0025
3,1009,3.0,2013-2014,I-4,19600529-0021
4,1264,4.0,2013-2014,I-5,19570828-0008


In [3]:
# read and parse the ontology file (.ttl)
# Load the RDFS schema
ontology_graph = Graph()
ontology_graph.parse("ontology.ttl", format="turtle")

# Create a new RDF graph for the triples from the data
rdf_graph = Graph()

# Define the namespaces
namespace = Namespace("http://assignment3.org/ontology#")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")

In [21]:
filtered_df = courses_instances[courses_instances['instanceId'] == 'I-1']
filtered_df


Unnamed: 0,courseCode,studyPeriod,courseInstanceAcademicYear,instanceId,teacherId
0,1204,1.0,2013-2014,I-1,19610918-0027


In [24]:
# generate a dataframe called "division" where we have the divisionName and the departmentName as columns.
# the departmentName has the values from D1 to D10, and for each departmentName, we have 8 divisionName values
# (e.g., D1 has D1-1, D1-2, ..., D1-8)
division_data = []
for i in range(1, 11):
    for j in range(1, 9):
        division_data.append({'divisionName': f'D{i}-{j}', 'departmentName': f'D{i}'})

# Convert the list of dictionaries to a DataFrame
division = pd.DataFrame(division_data)

# Save it as a CSV file with the name "Division.csv"
division.to_csv('data/Division.csv', index=False)

In [26]:
divisions = pd.read_csv('Division.csv')
divisions[divisions['departmentName'] == 'D1']

Unnamed: 0,divisionName,departmentName
0,D1-1,D1
1,D1-2,D1
2,D1-3,D1
3,D1-4,D1
4,D1-5,D1
5,D1-6,D1
6,D1-7,D1
7,D1-8,D1


In [31]:
# Iterate over the data and create RDF triples
for index, row in students.iterrows():
  student_uri = URIRef(f'{namespace}Student/{row["studentId"]}')
  rdf_graph.add((student_uri, RDF.type, namespace.Student))
  rdf_graph.add((student_uri, namespace.hasName, Literal(row['studentName'])))
  rdf_graph.add((student_uri, namespace.hasProgramme, URIRef(namespace + "Programme/" + str(row['programmeCode']))))
  rdf_graph.add((student_uri, namespace.hasYear, Literal(row['year'])))
  rdf_graph.add((student_uri, namespace.hasGraduated, Literal(row['graduated'])))
  rdf_graph.add((student_uri, namespace.isTA, Literal(row['workAsTA'])))
  
  # if the student works as a TA, add the teacher ID (as teaching assistant)
  if row['workAsTA']:
    teaching_assistant_uri = URIRef(f'{namespace}TeachingAssistant/{row["studentId"]}')
    rdf_graph.add((student_uri, namespace.WorkAsTA, Literal(row['teacherId'])))

# Iterate over the 'courses' dataframe
for index, row in courses.iterrows():
    # URI id and type
    course_uri = URIRef(f'{namespace}Course/{row["courseCode"]}')
    rdf_graph.add((course_uri, RDF.type, namespace.Course))
    
    # data properties
    rdf_graph.add((course_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((course_uri, namespace.hasCourseName, Literal(row['courseName'])))
    rdf_graph.add((course_uri, namespace.hasCredits, Literal(row['credits'])))
    rdf_graph.add((course_uri, namespace.hasLevel, Literal(row['level'])))
    
    # object properties
    rdf_graph.add((course_uri, namespace.courseBelongsTo, URIRef(namespace + "Division/" + str(row['divisionName']))))
    rdf_graph.add((course_uri, namespace.courseOwnedBy, URIRef(namespace + "Programme/" + str(row['programmeName']))))
    

# Iterate over the 'courses_instances' dataframe
for index, row in courses_instances.iterrows():
    instance_uri = URIRef(f'{namespace}CourseInstance/{row["instanceId"]}')
    rdf_graph.add((instance_uri, RDF.type, namespace.CourseInstance))
    rdf_graph.add((instance_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((instance_uri, namespace.hasStudyPeriod, Literal(row['studyPeriod'])))
    rdf_graph.add((instance_uri, namespace.hasAcademicYear, Literal(row['courseInstanceAcademicYear'])))
    rdf_graph.add((instance_uri, namespace.hasTeacherId, Literal(row['teacherId'])))

# Iterate over the 'courses_plannings' dataframe
for index, row in courses_plannings.iterrows():
    planning_uri = URIRef(f'{namespace}CoursePlanning/{row["instanceId"]}')
    rdf_graph.add((planning_uri, RDF.type, namespace.CoursePlanning))
    rdf_graph.add((planning_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((planning_uri, namespace.hasNumStudents, Literal(row['planningNumStudents'])))
    rdf_graph.add((planning_uri, namespace.hasSeniorPlannedHours, Literal(row['seniorPlannedHours'])))
    rdf_graph.add((planning_uri, namespace.hasAssistantPlannedHours, Literal(row['assistantPlannedHours'])))

# Iterate over the 'programme_courses' dataframe
for index, row in programme_courses.iterrows():
    programme_course_uri = URIRef(f'{namespace}ProgrammeCourse/{row["programmeCode"]}_{row["courseCode"]}')
    rdf_graph.add((programme_course_uri, RDF.type, namespace.ProgrammeCourse))
    rdf_graph.add((programme_course_uri, namespace.hasProgrammeCode, Literal(row['programmeCode'])))
    rdf_graph.add((programme_course_uri, namespace.hasStudyYear, Literal(row['studyYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasAcademicYear, Literal(row['programmeCourseAcademicYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((programme_course_uri, namespace.hasCourseType, Literal(row['courseType'])))

# Iterate over the 'programmes' dataframe
for index, row in programmes.iterrows():
    programme_uri = URIRef(f'{namespace}Programme/{row["programmeCode"]}')
    rdf_graph.add((programme_uri, RDF.type, namespace.Programme))
    rdf_graph.add((programme_uri, namespace.hasProgrammeName, Literal(row['programmeName'])))
    rdf_graph.add((programme_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((programme_uri, namespace.hasTeacherID, Literal(row['teacherId'])))

# Iterate over the 'registrations' dataframe
for index, row in registrations.iterrows():
    registration_uri = URIRef(f'{namespace}Registration/{row["instanceId"]}_{row["studentId"]}')
    rdf_graph.add((registration_uri, RDF.type, namespace.Registration))
    rdf_graph.add((registration_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((registration_uri, namespace.hasStudentID, Literal(row['studentId'])))
    rdf_graph.add((registration_uri, namespace.hasStatus, Literal(row['status'])))
    rdf_graph.add((registration_uri, namespace.hasGrade, Literal(row['grade'])))

# Iterate over the 'teacher_hours' dataframe
for index, row in teacher_hours.iterrows():
    teacher_hours_uri = URIRef(f'{namespace}TeacherHours/{row["instanceId"]}_{row["teacherId"]}')
    rdf_graph.add((teacher_hours_uri, RDF.type, namespace.TeacherHours))
    rdf_graph.add((teacher_hours_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasAssignedHours, Literal(row['assignedHours'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasReportedHours, Literal(row['reportedHours'])))

# Iterate over the 'teaching_assistants' dataframe
for index, row in teaching_assistants.iterrows():
    teaching_assistant_uri = URIRef(f'{namespace}TeachingAssistant/{row["teacherId"]}')
    rdf_graph.add((teaching_assistant_uri, RDF.type, namespace.TeachingAssistant))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDivisionName, Literal(row['divisionName'])))
    
# Iterate over the 'senior_teachers' dataframe
for index, row in senior_teachers.iterrows():
    senior_teacher_uri = URIRef(f'{namespace}SeniorTeacher/{row["teacherId"]}')
    rdf_graph.add((senior_teacher_uri, RDF.type, namespace.SeniorTeacher))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDivisionName, Literal(row['divisionName'])))

# Merge the ontology graph and the RDF graph
# rdf_graph += ontology_graph

# Save the RDF graph to a file
rdf_graph.serialize(destination='output/output.ttl', format='turtle')

<Graph identifier=N3bae4dd84ae64ac49ddd26a459edae0e (<class 'rdflib.graph.Graph'>)>