In [11]:
import pandas as pd
import numpy as np
from urllib.parse import quote
from rdflib import Graph, Namespace, URIRef, Literal, RDF
from rdflib.namespace import RDFS, OWL, XSD

In [12]:
# Load all the csv in the data older
students = pd.read_csv('data/Students.csv')
courses = pd.read_csv('data/Courses.csv')
courses_instances = pd.read_csv('data/Course_Instances.csv')
courses_plannings = pd.read_csv('data/Course_plannings.csv')
programme_courses = pd.read_csv('data/Programme_Courses.csv')
programmes = pd.read_csv('data/Programmes.csv')
registrations = pd.read_csv('data/Registrations.csv')
senior_teachers = pd.read_csv('data/Senior_Teachers.csv')
teaching_assistants = pd.read_csv('data/Teaching_Assistants.csv')
teacher_hours = pd.read_csv('data/Teacher_Hours.csv')

In [13]:
courses_instances.head()

Unnamed: 0,courseCode,studyPeriod,courseInstanceAcademicYear,instanceId,teacherId
0,1204,1.0,2013-2014,I-1,19610918-0027
1,1082,3.0,2013-2014,I-2,19620522-0023
2,1190,4.0,2013-2014,I-3,19560705-0025
3,1009,3.0,2013-2014,I-4,19600529-0021
4,1264,4.0,2013-2014,I-5,19570828-0008


In [14]:
# read and parse the ontology file (.ttl)
# Load the RDFS schema
ontology_graph = Graph()
ontology_graph.parse("ontology.ttl", format="turtle")

# Create a new RDF graph for the triples from the data
rdf_graph = Graph()

# Define the namespaces
namespace = Namespace("http://assignment3.org/ontology#")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")

In [15]:
filtered_df = courses_instances[courses_instances['instanceId'] == 'I-1']
filtered_df


Unnamed: 0,courseCode,studyPeriod,courseInstanceAcademicYear,instanceId,teacherId
0,1204,1.0,2013-2014,I-1,19610918-0027


In [16]:
# generate a dataframe called "division" where we have the divisionName and the departmentName as columns.
# the departmentName has the values from D1 to D10, and for each departmentName, we have 8 divisionName values
# (e.g., D1 has D1-1, D1-2, ..., D1-8)
division_data = []
for i in range(1, 11):
    for j in range(1, 9):
        division_data.append({'divisionName': f'D{i}-{j}', 'departmentName': f'D{i}'})
divisions = pd.DataFrame(division_data)
divisions.to_csv('data/Division.csv', index=False)
        
# generate the "department" dataframe with "departmentName" and random "teacherId" columns
# found in the "senior_teachers" dataframe
# department_data = []
# for i in range(1, 11):
#     department_data.append({'departmentName': f'D{i}', 'teacherId': senior_teachers.sample()['teacherId'].values[0]})
# departments = pd.DataFrame(department_data)
# departments.to_csv('data/Department.csv', index=False)

In [17]:
divisions = pd.read_csv('data/Division.csv')
divisions[divisions['departmentName'] == 'D1']

Unnamed: 0,divisionName,departmentName
0,D1-1,D1
1,D1-2,D1
2,D1-3,D1
3,D1-4,D1
4,D1-5,D1
5,D1-6,D1
6,D1-7,D1
7,D1-8,D1


In [18]:
teacher_class = namespace.Teacher
ta_class = namespace.TeachingAssistant
senior_teacher_class = namespace.SeniorTeacher

# Assert subclass relationships
rdf_graph.add((ta_class, RDF.type, rdfs.Class))
rdf_graph.add((ta_class, RDFS.subClassOf, teacher_class))
rdf_graph.add((senior_teacher_class, RDF.type, rdfs.Class))
rdf_graph.add((senior_teacher_class, RDFS.subClassOf, teacher_class))

<Graph identifier=N0261f7124f314ed095e0d4f6759828cb (<class 'rdflib.graph.Graph'>)>

In [19]:
# Iterate over the data and create RDF triples
for index, row in students.iterrows():
  student_uri = URIRef(f'{namespace}Student/{row["studentId"]}')

  # data properties 
  rdf_graph.add((student_uri, RDF.type, namespace.Student))
  rdf_graph.add((student_uri, namespace.hasName, Literal(row['studentName'])))
  rdf_graph.add((student_uri, namespace.hasProgramme, URIRef(namespace + "Programme/" + str(row['programmeCode']))))
  rdf_graph.add((student_uri, namespace.hasYear, Literal(row['year'])))
  rdf_graph.add((student_uri, namespace.hasGraduated, Literal(row['graduated'])))
  rdf_graph.add((student_uri, namespace.isTA, Literal(row['workAsTA'])))
  
  # object properties
  # if the student works as a TA, add the teacher ID (as teaching assistant)
  if row['workAsTA']:
    teaching_assistant_uri = URIRef(f'{namespace}TeachingAssistant/{row["studentId"]}')
    rdf_graph.add((student_uri, namespace.WorkAsTA, teaching_assistant_uri))

# Iterate over the 'courses' dataframe
for index, row in courses.iterrows():
    # URI id and type
    course_uri = URIRef(f'{namespace}Course/{row["courseCode"]}')
    rdf_graph.add((course_uri, RDF.type, namespace.Course))
    
    # data properties
    rdf_graph.add((course_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((course_uri, namespace.hasCourseName, Literal(row['courseName'])))
    rdf_graph.add((course_uri, namespace.hasCredits, Literal(row['credits'])))
    rdf_graph.add((course_uri, namespace.hasLevel, Literal(row['level'])))
    
    # object properties
    rdf_graph.add((course_uri, namespace.courseBelongsTo, URIRef(namespace + "Division/" + str(row['divisionName']))))
    rdf_graph.add((course_uri, namespace.courseOwnedBy, URIRef(namespace + "Programme/" + str(row['programmeName']))))
    

# Iterate over the 'courses_instances' dataframe
for index, row in courses_instances.iterrows():
    instance_uri = URIRef(f'{namespace}CourseInstance/{row["instanceId"]}')

    #data properties
    rdf_graph.add((instance_uri, namespace.hasStudyPeriod, Literal(row['studyPeriod'])))
    rdf_graph.add((instance_uri, namespace.hasAcademicYear, Literal(row['courseInstanceAcademicYear'])))
    
    # object properties
    rdf_graph.add((instance_uri, namespace.courseInstanceOf, URIRef(namespace + "Course/" + str(row['courseCode']))))
    rdf_graph.add((instance_uri, namespace.seniorTeacherExamines, URIRef(namespace + "SeniorTeacher/" + str(row['teacherId']))))


for index, row in courses_plannings.iterrows():
    planning_uri = URIRef(f'{namespace}CoursePlanning/{row["instanceId"]}')

    # data properties
    rdf_graph.add((planning_uri, RDF.type, namespace.CoursePlanning))
    rdf_graph.add((planning_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((planning_uri, namespace.hasNumStudents, Literal(row['planningNumStudents'])))
    rdf_graph.add((planning_uri, namespace.hasSeniorPlannedHours, Literal(row['seniorPlannedHours'])))
    rdf_graph.add((planning_uri, namespace.hasAssistantPlannedHours, Literal(row['assistantPlannedHours'])))

    # object properties
    rdf_graph.add((planning_uri, namespace.coursePlanningHasInsrance, URIRef(namespace + "CourseInstance/" + str(row['instanceId']))))

# Iterate over the 'programme_courses' dataframe
for index, row in programme_courses.iterrows():

    #data properties
    programme_course_uri = URIRef(f'{namespace}ProgrammeCourse/{row["programmeCode"]}_{row["courseCode"]}_{row["studyYear"]}')
    rdf_graph.add((programme_course_uri, RDF.type, namespace.ProgrammeCourse))
    rdf_graph.add((programme_course_uri, namespace.hasStudyYear, Literal(row['studyYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasAcademicYear, Literal(row['programmeCourseAcademicYear'])))
    rdf_graph.add((programme_course_uri, namespace.hasCourseType, Literal(row['courseType'])))

    # object properties
    rdf_graph.add((programme_course_uri, namespace.programmeCourseBelongsTo, URIRef(namespace + "Programme/" + str(row['programmeCode']))))
    rdf_graph.add((programme_course_uri, namespace.programmeCourseContains, URIRef(namespace + "Course/" + str(row['courseCode']))))

# Iterate over the 'programmes' dataframe
for index, row in programmes.iterrows():
    programme_uri = URIRef(f'{namespace}Programme/{row["programmeCode"]}')

    # data properties
    rdf_graph.add((programme_uri, RDF.type, namespace.Programme))
    rdf_graph.add((programme_uri, namespace.hasProgrammeName, Literal(row['programmeName'])))
    rdf_graph.add((programme_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((programme_uri, namespace.hasTeacherID, Literal(row['teacherId'])))

    # object properties
    rdf_graph.add((programme_uri, namespace.programmeBelongsTo, URIRef(namespace + "Department/" + str(row['departmentName']))))
    rdf_graph.add((programme_uri, namespace.programmeDirectedBy, URIRef(namespace + "SeniorTeacher/" + str(row['teacherId']))))
    

# Iterate over the 'registrations' dataframe
for index, row in registrations.iterrows():
    registration_uri = URIRef(f'{namespace}Registration/{row["instanceId"]}_{row["studentId"]}')

    # data properties
    rdf_graph.add((registration_uri, RDF.type, namespace.Registration))
    rdf_graph.add((registration_uri, namespace.hasInstanceID, Literal(row['instanceId'])))
    rdf_graph.add((registration_uri, namespace.hasStudentID, Literal(row['studentId'])))
    rdf_graph.add((registration_uri, namespace.hasStatus, Literal(row['status'])))
    rdf_graph.add((registration_uri, namespace.hasGrade, Literal(row['grade'])))

    # object properties
    rdf_graph.add((registration_uri, namespace.registrationContainsInstance, URIRef(namespace + "CourseInstance/" + str(row['instanceId']))))
    rdf_graph.add((registration_uri, namespace.studentRegistered, URIRef(namespace + "Student/" + str(row['studentId']))))
    

# Iterate over the 'teacher_hours' dataframe
for index, row in teacher_hours.iterrows():
    teacher_hours_uri = URIRef(f'{namespace}TeacherHours/{row["instanceId"]}_{row["teacherId"]}')

    # data properties
    rdf_graph.add((teacher_hours_uri, RDF.type, namespace.TeacherHours))
    rdf_graph.add((teacher_hours_uri, namespace.hasCourseCode, Literal(row['courseCode'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasAssignedHours, Literal(row['assignedHours'])))
    rdf_graph.add((teacher_hours_uri, namespace.hasReportedHours, Literal(row['reportedHours'])))

    #object properties
    # check if teacher is a senior teacher or a teaching assistant
    if row['teacherId'] in senior_teachers['teacherId'].values:
        rdf_graph.add((teacher_hours_uri, namespace.teacherHoursIn, URIRef(namespace + "SeniorTeacher/" + str(row['teacherId']))))
    elif row['teacherId'] in teaching_assistants['teacherId'].values:
        rdf_graph.add((teacher_hours_uri, namespace.teacherHoursIn, URIRef(namespace + "TeachingAssistant/" + str(row['teacherId']))))
    else:
        rdf_graph.add((teacher_hours_uri, namespace.teacherHoursIn, URIRef(namespace + "Teacher/" + str(row['teacherId']))))
    
    rdf_graph.add((teacher_hours_uri, namespace.courseHoursIn, URIRef(namespace + "CourseInstance/" + str(row['instanceId']))))

# Iterate over the 'teaching_assistants' dataframe
for index, row in teaching_assistants.iterrows():
    teaching_assistant_uri = URIRef(f'{namespace}TeachingAssistant/{row["teacherId"]}')

    # data properties
    rdf_graph.add((teaching_assistant_uri, RDF.type, namespace.TeachingAssistant))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((teaching_assistant_uri, namespace.hasDivisionName, Literal(row['divisionName'])))

    #object properties
    rdf_graph.add((teaching_assistant_uri, namespace.teacherBelongsTo, URIRef(namespace + "Division/" + str(row['divisionName']))))
    
# Iterate over the 'senior_teachers' dataframe
for index, row in senior_teachers.iterrows():
    senior_teacher_uri = URIRef(f'{namespace}SeniorTeacher/{row["teacherId"]}')

    # data properties
    rdf_graph.add((senior_teacher_uri, RDF.type, namespace.SeniorTeacher))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherName, Literal(row['teacherName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasTeacherID, Literal(row['teacherId'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))
    rdf_graph.add((senior_teacher_uri, namespace.hasDivisionName, Literal(row['divisionName'])))
    
    # object properties

    rdf_graph.add((senior_teacher_uri, namespace.seniorTeacherDirectorOf, URIRef(namespace + "Division/" + str(row['divisionName']))))
    rdf_graph.add((senior_teacher_uri, namespace.teacherBelongsTo, URIRef(namespace + "Division/" + str(row['divisionName']))))
    
for index, row in divisions.iterrows():
    division_uri = URIRef(f'{namespace}Division/{row["divisionName"]}')

    # data properties
    rdf_graph.add((division_uri, RDF.type, namespace.Division))
    rdf_graph.add((division_uri, namespace.hasDepartmentName, Literal(row['departmentName'])))

    # object properties
    rdf_graph.add((division_uri, namespace.divisionBelongsTo, URIRef(namespace + "Department/" + str(row['departmentName']))))

    

# Merge the ontology graph and the RDF graph
# rdf_graph += ontology_graph

# Save the RDF graph to a file
rdf_graph.serialize(destination='output/output.ttl', format='turtle')

<Graph identifier=N0261f7124f314ed095e0d4f6759828cb (<class 'rdflib.graph.Graph'>)>