In [3]:
import re
import pandas as pd
import processing
from graphs import *

DATA_PATH = "./data/courses.json"

## Dataset

Courses are indexed by their codes.

In [4]:
courses_df = pd.read_json(DATA_PATH, orient="index")
courses_df

Unnamed: 0,title,outline_school,outline_college,outline_level_year,outline_scqf,outline_ects,entry_pre_req,additional_keywords,delivery_start
arch07001,Undergraduate Course: Art and Design (ARCH07001),Edinburgh College of Art,"College of Arts, Humanities and Social Sciences",SCQF Level 7 (Year 1 Undergraduate),20,10.0,,"Art,Design,Drawing,Representation",
arch08006,Undergraduate Course: Architectural Design: An...,Edinburgh College of Art,"College of Arts, Humanities and Social Sciences",SCQF Level 8 (Year 2 Undergraduate),20,10.0,Students MUST have passed:\nArchitectural Desi...,Studio,Semester 2
arch08004,Undergraduate Course: Architectural Design: As...,Edinburgh College of Art,"College of Arts, Humanities and Social Sciences",SCQF Level 8 (Year 1 Undergraduate),20,10.0,,"Assembly,Interior,Exterior,Dwelling",Semester 2
arch08001,Undergraduate Course: Architectural Design: El...,Edinburgh College of Art,"College of Arts, Humanities and Social Sciences",SCQF Level 8 (Year 1 Undergraduate),20,10.0,,"Studio,path,place",Semester 1
arch08007,Undergraduate Course: Architectural Design: In...,Edinburgh College of Art,"College of Arts, Humanities and Social Sciences",SCQF Level 8 (Year 2 Undergraduate),20,10.0,Students MUST have passed:\nArchitectural Desi...,Studio,Semester 1
...,...,...,...,...,...,...,...,...,...
puhr11040,Postgraduate Course: Statistical Modelling (PU...,"Deanery of Molecular, Genetic and Population H...",College of Medicine and Veterinary Medicine,SCQF Level 11 (Postgraduate),10,5.0,Students MUST have passed:\nIntroduction to St...,"Statistics,statistical methods,R,SPSS,modellin...",Block 4 (Sem 2)
puhr11102,Postgraduate Course: Statistical Modelling for...,"Deanery of Molecular, Genetic and Population H...",College of Medicine and Veterinary Medicine,SCQF Level 11 (Postgraduate),10,5.0,,"Statistics,statistical methods,R,modelling,reg...",
puhr11069,Postgraduate Course: Systematic Reviews (PUHR1...,"Deanery of Molecular, Genetic and Population H...",College of Medicine and Veterinary Medicine,SCQF Level 11 (Postgraduate),10,5.0,,"Systematic review,public health,protocol devel...",Flexible
puhr11099,Postgraduate Course: Systematic Reviews for Pu...,"Deanery of Molecular, Genetic and Population H...",College of Medicine and Veterinary Medicine,SCQF Level 11 (Postgraduate),10,5.0,,"public health,Systematic review,protocol devel...",


In [5]:
scqf = courses_df["outline_level_year"]

SCQF_PATTERN = r"SCQF Level (?P<level>\d+) \((Year (?P<year>\d) (Undergraduate)|Postgraduate)\)"

def scqfGroup(level_year):
    m = re.match(SCQF_PATTERN, level_year)
    return m.groupdict()

courses_df["year"] = pd.to_numeric(scqf.apply(lambda x : scqfGroup(x)["year"]))
courses_df["level"] = pd.to_numeric(scqf.apply(lambda x : scqfGroup(x)["level"]))

START_PATTERN = r"Semester (?P<semester>\d)"

courses_df["semester"] = pd.to_numeric(courses_df["delivery_start"].apply(lambda x : re.match(START_PATTERN, x).groupdict()["semester"]))
courses_df["start"] = courses_df["year"] + courses_df["semester"] * 0.1

TypeError: expected string or bytes-like object, got 'NoneType'

In [6]:
prereqs = courses_df["entry_pre_req"].apply(processing.prereq)

In [5]:
G = PriorityGraph()

course_to_vertex = {}
vertex_to_course = {}

for course, _ in prereqs.items():
    i = G.addVertex(courses_df["start"][course])
    course_to_vertex[course] = i
    vertex_to_course[i] = course

for course, prereq in prereqs.items():
    for pre_course in prereq:
        if pre_course in course_to_vertex.keys():
            G.addEdge(course_to_vertex[pre_course], course_to_vertex[course])



In [6]:
L = G.topSort()
course_order = list(reversed(list(map(lambda i : courses_df.loc[vertex_to_course[i]]["title"], L))))

In [7]:
course_order

['Undergraduate Course: Introduction to Linear Algebra (MATH08057)',
 'Undergraduate Course: Fundamentals of Algebra and Calculus (MATH07003)',
 'Undergraduate Course: Engineering Mathematics 1a (MATH08074)',
 'Undergraduate Course: Introductory Mathematics with Applications (MATH07004)',
 'Undergraduate Course: Introduction to Data Science (MATH08077)',
 'Undergraduate Course: Mathematics for the Natural Sciences 1a (MATH08072)',
 'Undergraduate Course: Calculus and its Applications (MATH08058)',
 'Undergraduate Course: Proofs and Problem Solving (MATH08059)',
 'Undergraduate Course: Engineering Mathematics 1b (MATH08075)',
 'Undergraduate Course: Mathematics for the Natural Sciences 1b (MATH08073)',
 'Undergraduate Course: Facets of Mathematics (MATH08068)',
 'Undergraduate Course: Accelerated Algebra and Calculus for Direct Entry (MATH08062)',
 'Undergraduate Course: Several Variable Calculus and Differential Equations (MATH08063)',
 'Undergraduate Course: Probability (MATH08066)',


TODO: Connected components