In [59]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [60]:
from sentence_transformers import SentenceTransformer, util

In [80]:
import pandas as pd
from owlready2 import *

# Course Data Loader & Preprocessing
Load all available course with metadata

In [62]:
course_raw_file = "D:/Documents/Programming/CurriculumPlanning/data/course_raw.csv"

In [63]:
# convert csv file into dataframe
df = pd.read_csv(course_raw_file)
df.head()

Unnamed: 0,ID,COURSE_ID,COURSE_NAME,CREDITS,ELECTIVE,HASSTUDIED,COURSE_DIFFICULTY,PREREQUISITES,COREQUISITE,PREVIOUS,COURSE_DESCRIPTION
0,1,MA001IU,Calculus 1,4,0,1,0.713,,,,This course introduces the principles of deep ...
1,2,IT116IU,C/C++ Programming,4,0,1,0.724,,,,This course covers fundamental calculus concep...
2,3,PH013IU,Physics 1,2,0,1,0.356,,,,An introduction to structured and procedural p...
3,4,EN008IU,Listening AE1,2,0,1,0.605,,,,"Fundamentals of mechanics including motion, fo..."
4,5,EN007IU,Writing AE1,2,0,1,0.65,,,,A beginner-level course in academic English li...


In [64]:
df.isna().sum()

ID                     0
COURSE_ID              0
COURSE_NAME            0
CREDITS                0
ELECTIVE               0
HASSTUDIED             0
COURSE_DIFFICULTY      0
PREREQUISITES         45
COREQUISITE           58
PREVIOUS              47
COURSE_DESCRIPTION     0
dtype: int64

In [65]:
df = df.fillna(0)

In [66]:
df_cut = df.drop(columns=['HASSTUDIED'])

In [78]:
print(df_cut.isna().sum())

ID                    0
COURSE_ID             0
COURSE_NAME           0
CREDITS               0
ELECTIVE              0
COURSE_DIFFICULTY     0
PREREQUISITES         0
COREQUISITE           0
PREVIOUS              0
COURSE_DESCRIPTION    0
dtype: int64


In [67]:
# convert to another csv file
df_cut.to_csv("D:/Documents/Programming/CurriculumPlanning/data/course_transformed.csv", index=False)

# Features (User's Input):
completed courses, numbers of semesters, future goals, average difficulty of each semester

Future goals: case-extraction

In [68]:
user_input = "I want to make intelligent programs that understand images"

In [69]:
model = SentenceTransformer('all-MiniLM-L6-v2')
user_embedding = model.encode(user_input)

In [70]:
course_description = pd.DataFrame(df['COURSE_DESCRIPTION'])
course_description.head()

Unnamed: 0,COURSE_DESCRIPTION
0,This course introduces the principles of deep ...
1,This course covers fundamental calculus concep...
2,An introduction to structured and procedural p...
3,"Fundamentals of mechanics including motion, fo..."
4,A beginner-level course in academic English li...


In [71]:
course_description = df["COURSE_DESCRIPTION"].astype(str).tolist()

In [72]:
print(course_description)

['This course introduces the principles of deep learning, including neural networks, convolutional neural networks (CNNs), recurrent networks, and autoencoders, with practical applications in image and language processing.', 'This course covers fundamental calculus concepts such as limits, derivatives, and integrals, with applications in engineering and science.', 'An introduction to structured and procedural programming using the C and C++ languages, covering syntax, control structures, functions, and file handling.', 'Fundamentals of mechanics including motion, forces, energy, momentum, and rotational dynamics, tailored for science and engineering students.', 'A beginner-level course in academic English listening skills, focusing on understanding lectures, note-taking, and recognizing main ideas and details.', 'This course develops basic academic writing skills, including paragraph structure, coherence, and grammar for clear written communication.', 'A foundational course introducing

In [73]:
flattened = [item[0] for item in course_description]
course_embeddings = model.encode(flattened)

course_embeddings.shape

(60, 384)

In [74]:
similarities = util.cos_sim(user_embedding, course_embeddings)
print(similarities)

tensor([[0.0558, 0.0558, 0.0517, 0.0575, 0.0517, 0.0558, 0.0517, 0.0517, 0.0558,
         0.0517, 0.0835, 0.0517, 0.0400, 0.0558, 0.1039, 0.1039, 0.0703, 0.0632,
         0.0517, 0.0632, 0.1039, 0.0575, 0.0517, 0.0334, 0.0517, 0.1039, 0.0575,
         0.0558, 0.1039, 0.0632, 0.0835, 0.1039, 0.1039, 0.0575, 0.0632, 0.0558,
         0.0835, 0.0575, 0.1039, 0.0517, 0.0575, 0.0517, 0.0558, 0.0835, 0.1039,
         0.0632, 0.1039, 0.0632, 0.0835, 0.1039, 0.0400, 0.0575, 0.0632, 0.1039,
         0.0575, 0.0517, 0.0517, 0.1039, 0.0835, 0.0558]])


In [75]:
top_indices = similarities.argsort(descending=True).tolist()[0]

for i in top_indices[:5]:
  print(df.iloc[i])

ID                                                                   15
COURSE_ID                                                       IT154IU
COURSE_NAME                                              Linear Algebra
CREDITS                                                               3
ELECTIVE                                                              0
HASSTUDIED                                                            0
COURSE_DIFFICULTY                                                 0.685
PREREQUISITES                                                   MA002IU
COREQUISITE                                                           0
PREVIOUS                                                              0
COURSE_DESCRIPTION    Continues Calculus 1 with techniques of integr...
Name: 14, dtype: object
ID                                                                   58
COURSE_ID                                                       IT017IU
COURSE_NAME                             

# Ontology

Ontology Base = the “conceptual set” (C1, C2, C3…)
👉 Think: concepts like Course, Student, Goal

Ontology Relations Library (Logic) = the “correlation set” (R1, R2, R3…)
👉 Think: relationships like hasGoal, requires, fitsGoal

Function Set = reasoning or inferencing rules
👉 Think: logic rules like
IF course fitsGoal AND student hasGoal THEN recommend(course)

Knowledge Base of Ontology = reusable knowledge based on domain-specific ontologies
👉 e.g., one for AI curriculum, one for Medicine, etc.

In [76]:
from rdflib import Graph, Namespace, Literal, RDF, URIRef

# 1. Extract
df = pd.read_csv("D:/Documents/Programming/CurriculumPlanning/data/course_transformed.csv")

# 2. Transform
EX = Namespace("https://github.com/ngqynhanh/CurriculumPlanning/")

g = Graph()
for _, row in df.iterrows():
    course_uri = URIRef(f"https://github.com/ngqynhanh/CurriculumPlanning/course/{row['COURSE_ID']}")
    g.add((course_uri, RDF.type, EX.Course))
    g.add((course_uri, EX.name, Literal(row['COURSE_NAME'])))
    g.add((course_uri, EX.description, Literal(row['COURSE_DESCRIPTION'])))
    g.add((course_uri, EX.credits, Literal(row['CREDITS'])))
    g.add((course_uri, EX.hasPrerequisite, Literal(row['PREREQUISITES'])))
    g.add((course_uri, EX.elective, Literal(row['ELECTIVE'])))
    g.add((course_uri, EX.courseDifficulty, Literal(row['COURSE_DIFFICULTY'])))
    g.add((course_uri, EX.corequisite, Literal(row['COREQUISITE'])))
    g.add((course_uri, EX.previous, Literal(row['PREVIOUS'])))
    g.add((course_uri, EX.courseDescription, Literal(row['COURSE_DESCRIPTION'])))

# 3. Load (save as ontology file)
g.serialize("courses.ttl", format="turtle")

<Graph identifier=N40ae3fa32c084ce1b90ffdccdac9d6cd (<class 'rdflib.graph.Graph'>)>

In [None]:
# 1. Extract
df = pd.read_csv("D:/Documents/Programming/CurriculumPlanning/data/course_transformed.csv")

# 2. Create ontology
onto = get_ontology("https://github.com/ngqynhanh/CurriculumPlanning/ontology.owl")

with onto:
    # Define Classes
    class Course(Thing): pass
    class Program(Thing): pass
    
    # Define Object Properties
    class hasPrerequisite(ObjectProperty):
        domain = [Course]
        range = [Course]

    class corequisite(ObjectProperty):
        domain = [Course]
        range = [Course]

    # Define Data Properties
    class name(DataProperty):
        domain = [Course]
        range = [str]
        
    class description(DataProperty):
        domain = [Course]
        range = [str]
        
    class credits(DataProperty):
        domain = [Course]
        range = [int]
        
    class elective(DataProperty):
        domain = [Course]
        range = [bool]
        
    class courseDifficulty(DataProperty):
        domain = [Course]
        range = [float]

# 3. Transform & Load
for _, row in df.iterrows():
    # Create Course instance
    course_id = row['COURSE_ID']
    course_obj = onto.Course(course_id)
    
    # Add data properties
    course_obj.name = [row['COURSE_NAME']]
    course_obj.description = [row['COURSE_DESCRIPTION']]
    course_obj.credits = [int(row['CREDITS'])]
    course_obj.elective = [bool(row['ELECTIVE'])]
    course_obj.courseDifficulty = [float(row['COURSE_DIFFICULTY'])]
    
    # Add object properties (link to other course objects)
    if pd.notna(row['PREREQUISITES']) and row['PREREQUISITES'] != "":
        for prereq in str(row['PREREQUISITES']).split(","):
            prereq_id = prereq.strip()
            prereq_obj = onto.Course(prereq_id)  # Will auto-create if not exists yet
            course_obj.hasPrerequisite.append(prereq_obj)
    
    if pd.notna(row['COREQUISITE']) and row['COREQUISITE'] != "":
        for co in str(row['COREQUISITE']).split(","):
            co_id = co.strip()
            co_obj = onto.Course(co_id)
            course_obj.corequisite.append(co_obj)

# 4. Save OWL file
onto.save(file="courses.owl", format="rdfxml")
print("Ontology saved as courses.owl")


Ontology saved as courses.owl


In [77]:
# from rdflib import Graph, Namespace, RDF, RDFS, URIRef, Literal

# # Define Namespaces
# EX = Namespace("http://example.org/")

# # Create graph
# g = Graph()
# g.bind("ex", EX)

# # --- Concepts (C1, C2, C3…) ---
# g.add((EX.Student, RDF.type, RDFS.Class))
# g.add((EX.Course, RDF.type, RDFS.Class))
# g.add((EX.Goal, RDF.type, RDFS.Class))

# # --- Relationships (R1, R2…) ---
# g.add((EX.hasGoal, RDF.type, RDF.Property))
# g.add((EX.hasGoal, RDFS.domain, EX.Student))
# g.add((EX.hasGoal, RDFS.range, EX.Goal))

# g.add((EX.fitsGoal, RDF.type, RDF.Property))
# g.add((EX.fitsGoal, RDFS.domain, EX.Course))
# g.add((EX.fitsGoal, RDFS.range, EX.Goal))

# # --- Instances ---
# g.add((EX.Alice, RDF.type, EX.Student))
# g.add((EX.AI, RDF.type, EX.Goal))
# g.add((EX.DeepLearning, RDF.type, EX.Course))

# # --- Relations Between Instances ---
# g.add((EX.Alice, EX.hasGoal, EX.AI))
# g.add((EX.DeepLearning, EX.fitsGoal, EX.AI))

# # --- Simple Inference (manual logic for now) ---
# for student, _, goal in g.triples((None, EX.hasGoal, None)):
#     for course, _, course_goal in g.triples((None, EX.fitsGoal, goal)):
#         print(f"Recommend {course.split('/')[-1]} to {student.split('/')[-1]} because it fits goal: {goal.split('/')[-1]}")
