In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import random
import re

# **Construct Caltech, CUT and JHU CPNs**

## Caltech

In [3]:
Caltech_data = pd.read_csv("2021-22-course-prereqs - clean.csv")
Caltech_data["Prereaquisites (clean)"] = Caltech_data["Prereaquisites (clean)"].fillna("")
Caltech_data

Unnamed: 0,department_name,Acronym,course_number,Node_name,course_title,prerequisites,Prereaquisites (clean)
0,Aerospace,Ae,100,Ae 100,Research in Aerospace,,
1,Aerospace,Ae,101 abc,Ae 101 abc,Fluid Mechanics,"APh 17 or ME 11 abc, and ME 12 or equivalent, ...","APh 17 abc, ME 11 abc, ME 12 abc"
2,Aerospace,Ae,102 abc,Ae 102 abc,Mechanics of Structures and Solids,ME 12 abc,ME 12 abc
3,Aerospace,Ae,103 ab,Ae 103 ab,Aerospace Control Systems,"CDS 110 (or equivalent), CDS 131 or permission...","CDS 110, CDS 131"
4,Aerospace,Ae,104 abc,Ae 104 abc,Experimental Methods,ACM 95/100 ab or equivalent (may be taken conc...,"ACM 95/100 ab, Ae 101 abc"
...,...,...,...,...,...,...,...
766,Physics,Ph,236 abc,Ph 236 abc,General Relativity,a mastery of special relativity at the level o...,
767,Physics,Ph,237,Ph 237,Gravitational Radiation,"Ph 106 b, Ph 12 b or equivalents","Ph 106 abc, Ph 12 abc"
768,Physics,Ph,242 ab,Ph 242 ab,Physics Seminar,,
769,Physics,Ph,250,Ph 250,Introduction to String Theory,Ph 205 or equivalent,Ph 205 abc


In [5]:
Caltech_node_list = Caltech_data["Node_name"].to_list()
Caltech_node_list

['Ae 100',
 'Ae 101 abc',
 'Ae 102 abc',
 'Ae 103 ab',
 'Ae 104 abc',
 'Ae 105 abc',
 'Ae 115 ab',
 'Ae 118',
 'Ae 120',
 'Ae 150 abc',
 'Ae 159',
 'Ae 160 ab',
 'Ae 165 ab',
 'Ae 200',
 'Ae 201 a',
 'Ae 204 ab',
 'Ae 205 ab',
 'Ae 208 abc',
 'Ae 213',
 'Ae 214',
 'Ae 215',
 'Ae 218',
 'Ae 220',
 'Ae 221',
 'Ae 223',
 'Ae 225',
 'Ae 232 ab',
 'Ae 233',
 'Ae 234 ab',
 'Ae 235',
 'Ae 237 ab',
 'Ae 239 ab',
 'Ae 240',
 'Ae 241',
 'Ae 242',
 'Ae 250',
 'Ae 251 ab',
 'Ae 265 ab',
 'Ae 121 abc',
 'ACM 11',
 'ACM 80 abc',
 'ACM 81 abc',
 'ACM 95/100 ab',
 'ACM 101 ab',
 'ACM 104',
 'ACM 105',
 'ACM 106 ab',
 'ACM 109',
 'ACM 116',
 'ACM 118',
 'ACM 154',
 'ACM 170',
 'ACM 190',
 'ACM 201',
 'ACM 204',
 'ACM 206',
 'ACM 210',
 'ACM 213',
 'ACM 216',
 'ACM 217',
 'ACM 256',
 'ACM 257',
 'ACM 270',
 'ACM 300',
 'AM 127',
 'AM 150 abc',
 'AM 151',
 'AM 165',
 'AM 200',
 'AM 201',
 'AM 252',
 'AM 300',
 'APh 9 ab',
 'APh 17 abc',
 'APh 23',
 'APh 24',
 'APh 77 bc',
 'APh 78 abc',
 'APh 79 abc',
 '

In [6]:
Caltech_edge_list = []
for i in range(len(Caltech_node_list)):
  reqs = Caltech_data["Prereaquisites (clean)"][i]
  if reqs == "":
    continue
  splitted = reqs.split(",")
  cleaned = [string.lstrip() for string in splitted]
  for j in cleaned:
    Caltech_edge_list.append((j, Caltech_node_list[i]))

Caltech_edge_list


[('APh 17 abc', 'Ae 101 abc'),
 ('ME 11 abc', 'Ae 101 abc'),
 ('ME 12 abc', 'Ae 101 abc'),
 ('ME 12 abc', 'Ae 102 abc'),
 ('CDS 110', 'Ae 103 ab'),
 ('CDS 131', 'Ae 103 ab'),
 ('ACM 95/100 ab', 'Ae 104 abc'),
 ('Ae 101 abc', 'Ae 104 abc'),
 ('ME 11 abc', 'Ae 105 abc'),
 ('ME 12 abc', 'Ae 105 abc'),
 ('CDS 110', 'Ae 115 ab'),
 ('ME 11 abc', 'Ae 118'),
 ('ME 12 abc', 'Ae 118'),
 ('Ae 118', 'Ae 120'),
 ('ME 119', 'Ae 120'),
 ('Ph 2 abc', 'Ae 159'),
 ('EE 157 ab', 'Ae 159'),
 ('APh 23', 'Ae 159'),
 ('Ae 102 abc', 'Ae 165 ab'),
 ('Ae 101 abc', 'Ae 201 a'),
 ('ACM 101 ab', 'Ae 201 a'),
 ('Ae 101 abc', 'Ae 204 ab'),
 ('Ae 105 abc', 'Ae 205 ab'),
 ('Ae 102 abc', 'Ae 213'),
 ('ACM 95/100 ab', 'Ae 214'),
 ('Ae 102 abc', 'Ae 214'),
 ('CE 108 ab', 'Ae 214'),
 ('ACM 101 ab', 'Ae 215'),
 ('Ae 102 abc', 'Ae 215'),
 ('Ae 118', 'Ae 218'),
 ('Ae 102 abc', 'Ae 220'),
 ('Ae 102 abc', 'Ae 223'),
 ('Ae 101 abc', 'Ae 232 ab'),
 ('ACM 95/100 ab', 'Ae 232 ab'),
 ('Ae 101 abc', 'Ae 233'),
 ('Ae 101 abc', 'Ae 23

In [7]:
G_caltech = nx.DiGraph()
G_caltech.add_nodes_from(Caltech_node_list)
G_caltech.add_edges_from(Caltech_edge_list)
nx.is_directed_acyclic_graph(G_caltech)

True

## Cyprus University of Technology (CUT)

In [8]:
CUT_nodes = []
CUT_edges = []
with open("CUT_Courses.txt", encoding='utf-8') as f:
  contents = [line for line in f.readlines() if line != '\n']
  dlt = 'Pre Req. '
  for line in contents:
    clean = line.replace(dlt, '')
    brk = clean.split(' ', 1)
    postreq = brk[0].strip('\n')
    CUT_nodes.append(postreq)
    if len(brk) != 1:
      prereq = brk[1].strip('\n')
    pre = prereq.replace('(','').replace(')','')
    prereqs = []
    if ',' in pre:
      prereqs = pre.split(', ', 1)
    else:
      prereqs = [pre]
    for p in prereqs:
      CUT_edges.append((p, postreq))
  f.close()

G_CUT = nx.DiGraph()
G_CUT.add_nodes_from(CUT_nodes)
G_CUT.add_edges_from(CUT_edges)
nx.is_directed_acyclic_graph(G_CUT)

True

# Johns Hopkins University (JHU)

In [9]:
JH_data = pd.read_csv("JHU_Courses.csv")
JH_data["Pre Req"] = JH_data["Pre Req"].fillna("")
JH_data.dropna(subset=["Course"], inplace=True)
JH_data

Unnamed: 0,Course,Department Name,Title,Pre Req
0,AS.440.011,AAP Applied Economics,Forecasting in Organizations,You must enroll in AS.440.615 AND one of the f...
1,AS.440.304,AAP Applied Economics,Math Methods for Economists,
2,AS.440.601,AAP Applied Economics,Microeconomic Theory,
3,AS.440.602,AAP Applied Economics,Macroeconomic Theory,
4,AS.440.605,AAP Applied Economics,Statistics,
...,...,...,...,...
10081,SA.743.233,SN International Economics,Strategy,
10082,SA.745.316,SN International Politics,The Making of Chinese Foreign Policy,
10083,SA.740.001,SN M.A. Thesis,MAIS Thesis Extension,
10084,SA.747.401,SN M.A. Thesis,Independent Thesis Research I,


In [10]:
JH_nodes = JH_data["Course"].to_list()
JH_edges = []
expr = r"\w\w\.\d\d\d\.\d\d\d"
for i in range(len(JH_nodes)):
  prereqs = JH_data["Pre Req"][i]
  if prereqs != "":
    course = JH_nodes[i]
    pre_list = re.findall(expr, prereqs)
    if pre_list != []:
      for pre in pre_list:
        if pre != course:
          JH_edges.append((pre, course))

G_JH = nx.DiGraph()
G_JH.add_nodes_from(JH_nodes)
G_JH.add_edges_from(JH_edges)
nx.is_directed_acyclic_graph(G_JH)



True