# Create Network

In [1]:
import pandas as pd
f = '../data/formatted-courses.json'

In [2]:
df = pd.read_json(f)
print (df.columns)
df.head()

Index(['Description', 'Link', 'college', 'courseID', 'course_num',
       'department', 'hrs', 'name', 'prereqs'],
      dtype='object')


Unnamed: 0,Description,Link,college,courseID,course_num,department,hrs,name,prereqs
0,This course offers an examination of foundatio...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:3000,3000,EPLS,3.0,Foundations of Education,[]
1,"The focus of this course, which is required fo...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:4180,4180,EPLS,3.0,Human Relations for the Classroom Teachr,[]
2,Certificate for Interscholastic Athletics/Acti...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:4200,4200,EPLS,3.0,Diversity and Inclusion in Athletics,[]
3,This section is offered through Distance and O...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:5090,5090,EPLS,3.0,Instr Coaching for Teaching Excellence,[]
4,"Current selected functions, issues, policies o...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:5100,5100,EPLS,3.0,Issues and Policies in Higher Education,[]


### Prereqs

- remove duplicates
- add to prereqs list

In [3]:
prereqs_list = []
def make_prereq(course, prereqs):
    for i in prereqs:
        prereqs_list.append({"source": i, "target": course, "value": 1})
        
# remove dupes
df['prereqs'] = df['prereqs'].apply(lambda x: list(set(x)))

# create prereqs links
df.apply(lambda x: make_prereq(x['courseID'], x['prereqs']), axis=1)

0       None
1       None
2       None
3       None
4       None
5       None
6       None
7       None
8       None
9       None
10      None
11      None
12      None
13      None
14      None
15      None
16      None
17      None
18      None
19      None
20      None
21      None
22      None
23      None
24      None
25      None
26      None
27      None
28      None
29      None
        ... 
3452    None
3453    None
3454    None
3455    None
3456    None
3457    None
3458    None
3459    None
3460    None
3461    None
3462    None
3463    None
3464    None
3465    None
3466    None
3467    None
3468    None
3469    None
3470    None
3471    None
3472    None
3473    None
3474    None
3475    None
3476    None
3477    None
3478    None
3479    None
3480    None
3481    None
Length: 3482, dtype: object

In [4]:
prereqs_list[:10]

[{'source': 'LLS:1200', 'target': 'LLS:1201', 'value': 1},
 {'source': 'LLS:1610', 'target': 'LLS:1611', 'value': 1},
 {'source': 'BIOL:1370', 'target': 'GEOG:2374', 'value': 1},
 {'source': 'BIOL:1412', 'target': 'GEOG:2374', 'value': 1},
 {'source': 'BIOL:1141', 'target': 'GEOG:2374', 'value': 1},
 {'source': 'GEOG:1020', 'target': 'GEOG:2374', 'value': 1},
 {'source': 'BIOL:1261', 'target': 'GEOG:2374', 'value': 1},
 {'source': 'GEOG:1020', 'target': 'GEOG:3020', 'value': 1},
 {'source': 'EES:1080', 'target': 'GEOG:3020', 'value': 1},
 {'source': 'EES:1050', 'target': 'GEOG:3020', 'value': 1}]

In [5]:
# filter out the classes that don't exist
courseIDs = set(df['courseID'].unique().tolist())
have_course = lambda x: x['source'] in courseIDs and x['target'] in courseIDs


filtered_list = [i for i in prereqs_list if have_course(i) == False]
prereqs_list = [i for i in prereqs_list if have_course(i)]

In [6]:
# numbers
print ("Total Courses: {}".format(len(filtered_list) + len(prereqs_list)))
print ("# filtered: {}".format(len(filtered_list)))
print ("# remaining: {}".format(len(prereqs_list)))

Total Courses: 1718
# filtered: 402
# remaining: 1316


In [17]:
granular_filter_list = []
for i in filtered_list:
    source = i['source']
    target = i['target']
    if (source not in courseIDs):
        granular_filter_list.append(source)
    if (target not in courseIDs):
        granular_filter_list.append(target)

granular_filter_list[:10]

['BIOL:1370',
 'BIOL:1261',
 'BIOL:1370',
 'GEOG:3310',
 'ACTS:4180',
 'ACTS:4180',
 'STAT:5101',
 'STAT:3101',
 'STAT:3101',
 'STAT:3101']

### Antecedents

*list of courses that a given course leads to.*

- flip each source and add it to a list

In [18]:
antecedents_list = []
for i in prereqs_list:
    flipped = {"source": i["target"], "target": i["source"], "value": 1}
    antecedents_list.append(flipped)

In [19]:
indexed_df = df.set_index('courseID', drop=False)
indexed_df.head()

Unnamed: 0_level_0,Description,Link,college,courseID,course_num,department,hrs,name,prereqs
courseID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
EPLS:3000,This course offers an examination of foundatio...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:3000,3000,EPLS,3.0,Foundations of Education,[]
EPLS:4180,"The focus of this course, which is required fo...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:4180,4180,EPLS,3.0,Human Relations for the Classroom Teachr,[]
EPLS:4200,Certificate for Interscholastic Athletics/Acti...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:4200,4200,EPLS,3.0,Diversity and Inclusion in Athletics,[]
EPLS:5090,This section is offered through Distance and O...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:5090,5090,EPLS,3.0,Instr Coaching for Teaching Excellence,[]
EPLS:5100,"Current selected functions, issues, policies o...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Education,EPLS:5100,5100,EPLS,3.0,Issues and Policies in Higher Education,[]


In [20]:
# create a map
courseIDs = indexed_df['courseID'].unique().tolist()
course_prereqs = {}
for i in courseIDs:
    course_prereqs[i] = []

# populate map
for i in prereqs_list:
    course = course_prereqs[i['target']]
    course.append(i)
    
# move the data to the correct row in DF
indexed_df['prereqs'] = indexed_df['courseID'].apply(lambda x: course_prereqs[x])

In [21]:
course_after = {}
for i in courseIDs:
    course_after[i] = []
    
for i in antecedents_list:
    course = course_after[i['source']]
    course.append(i)
    
indexed_df['after'] = indexed_df['courseID'].apply(lambda x: course_after[x])

In [22]:
indexed_df.columns

Index(['Description', 'Link', 'college', 'courseID', 'course_num',
       'department', 'hrs', 'name', 'prereqs', 'after'],
      dtype='object')

In [23]:
indexed_df[['name', 'prereqs', 'after']].iloc[400:500]

Unnamed: 0_level_0,name,prereqs,after
courseID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CW:3218,Creative Writing for New Media,[],[]
CW:3870,Advanced Fiction Writing,"[{'source': 'CW:2870', 'target': 'CW:3870', 'v...","[{'source': 'CW:3870', 'target': 'CW:2870', 'v..."
CW:3875,Advanced Poetry Writing,"[{'source': 'CW:2875', 'target': 'CW:3875', 'v...","[{'source': 'CW:3875', 'target': 'CW:2875', 'v..."
CW:4745,The Sentence: Strategies for Writing,[],[]
CW:4751,Creative Writing for the Musician,[],[]
CW:4760,The Art of Revision: Writing for Clarity,[],[]
CW:4870,Undergraduate Writers' Workshop: Fiction,[],[]
CW:4875,Undergraduate Writers' Workshop: Poetry,[],[]
CW:4894,Undergrad Project in Creative Writing,[],[]
CW:4897,Novel Writing,[],[]


In [24]:
indexed_df.rename(columns={'Description': 'description', 'Link': 'link'}, inplace=True)

### Duplicates

In [43]:
# view duplicated
duplicated = indexed_df[indexed_df.index.duplicated() == True].sort_index()
duplicated[['name', 'description']]

Unnamed: 0_level_0,name,description
courseID,Unnamed: 1_level_1,Unnamed: 2_level_1
ABRD:3045,Academic Year in Freiburg ...,The program in Freiburg offers students the op...
ABRD:3063,CIEE Paris Global Institute ...,
ABRD:3063,CIEE Paris Global Institute ...,
ABRD:3063,CIEE Paris Global Institute ...,
ABRD:3500,Study Abroad ...,
ABRD:3502,Study Abroad Independent Enrollment ...,
ABRD:4510,International Student Exchange Program ...,This is a reciprocal exchange program between ...
ANTH:1000,First-Year Seminar ...,A Tour of Biological Anthropology: Explore the...
ASIA:1040,Living Religions of the East,This course will offer an introduction to the ...
BIOL:3626,Cell Biology Laboratory,Conceptual understanding and technical skills ...


**OK.** So most of the duplicated classes are writing, and repititive. Keep the first incidence!

In [56]:
def get_dupes(df, idx):
    return df[df.index == idx]
duped_ids = set(duplicated.index.unique().tolist())

In [57]:
duped_ids

{'ABRD:3045',
 'ABRD:3063',
 'ABRD:3500',
 'ABRD:3502',
 'ABRD:4510',
 'ANTH:1000',
 'ASIA:1040',
 'BIOL:3626',
 'CBE:1180',
 'CLAS:1005',
 'COMM:2090',
 'CS:1000',
 'CS:4980',
 'CSD:1000',
 'CSI:1200',
 'DANC:1000',
 'DANC:1020',
 'DANC:1120',
 'DANC:2020',
 'DANC:6880',
 'DPA:1000',
 'DSGN:2500',
 'DSGN:3500',
 'EDTL:1129',
 'EDTL:4087',
 'EDTL:4089',
 'EDTL:4091',
 'EDTL:4092',
 'EDTL:4096',
 'EDTL:4192',
 'EDTL:7015',
 'EES:3160',
 'EES:4230',
 'ENGL:1000',
 'ENGL:1200',
 'ENGL:3430',
 'ENGL:4720',
 'ENTR:9600',
 'EPLS:5240',
 'EPLS:6311',
 'ESL:1005',
 'EVNT:3185',
 'FIN:1300',
 'FREN:3000',
 'HHP:1000',
 'HIST:1010',
 'HIST:1016',
 'HIST:1401',
 'HIST:1403',
 'HIST:2151',
 'HONR:1300',
 'IBA:1041',
 'IE:4600',
 'ITAL:1000',
 'JMC:1000',
 'JMC:3185',
 'JMC:3410',
 'LLS:1110',
 'MBA:8500',
 'ME:4086',
 'ME:6191',
 'MGMT:1300',
 'MGMT:3999',
 'MKTG:1300',
 'MUS:1000',
 'MUS:1020',
 'MUS:3160',
 'MUS:3190',
 'MUS:3482',
 'MUS:3485',
 'MUS:3659',
 'MUS:3730',
 'MUS:3740',
 'MUS:3800',

In [63]:
for i in get_dupes(duplicated, 'WRIT:1600').iterrows():
    print (i[0])

WRIT:1600
WRIT:1600
WRIT:1600
WRIT:1600
WRIT:1600


In [72]:
test = get_dupes(duplicated, 'WRIT:1600')
test.index = test.index + test.groupby(by=test.index).cumcount().astype(str).replace('0', '')

In [73]:
test

Unnamed: 0,description,link,college,courseID,course_num,department,hrs,name,prereqs,after
WRIT:1600,"Without the figurative language, the world wou...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,WRIT:1600,1600,WRIT,1.0,Fast Fixes: Improved Writing in 6 Weeks ...,"[{'source': 'RHET:1030', 'target': 'WRIT:1600'...","[{'source': 'WRIT:1600', 'target': 'RHET:1030'..."
WRIT:16001,Do you dread writing papers? Do you tremble wi...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,WRIT:1600,1600,WRIT,1.0,Fast Fixes: Improved Writing in 6 Weeks ...,"[{'source': 'RHET:1030', 'target': 'WRIT:1600'...","[{'source': 'WRIT:1600', 'target': 'RHET:1030'..."
WRIT:16002,Your papers come back with lower grades than y...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,WRIT:1600,1600,WRIT,1.0,Fast Fixes: Improved Writing in 6 Weeks ...,"[{'source': 'RHET:1030', 'target': 'WRIT:1600'...","[{'source': 'WRIT:1600', 'target': 'RHET:1030'..."
WRIT:16003,"Whether you’re headed for the workforce, gradu...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,WRIT:1600,1600,WRIT,1.0,Fast Fixes: Improved Writing in 6 Weeks ...,"[{'source': 'RHET:1030', 'target': 'WRIT:1600'...","[{'source': 'WRIT:1600', 'target': 'RHET:1030'..."
WRIT:16004,We all know good grammar helps us communicate ...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,WRIT:1600,1600,WRIT,1.0,Fast Fixes: Improved Writing in 6 Weeks ...,"[{'source': 'RHET:1030', 'target': 'WRIT:1600'...","[{'source': 'WRIT:1600', 'target': 'RHET:1030'..."


In [66]:
duplicated

Unnamed: 0_level_0,description,link,college,courseID,course_num,department,hrs,name,prereqs,after
courseID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABRD:3045,The program in Freiburg offers students the op...,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3045,3045,ABRD,15.0,Academic Year in Freiburg ...,[],[]
ABRD:3063,,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3063,3063,ABRD,18.0,CIEE Paris Global Institute ...,[],[]
ABRD:3063,,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3063,3063,ABRD,18.0,CIEE Paris Global Institute ...,[],[]
ABRD:3063,,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3063,3063,ABRD,12.0,CIEE Paris Global Institute ...,[],[]
ABRD:3500,,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3500,3500,ABRD,9.0,Study Abroad ...,[],[]
ABRD:3502,,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:3502,3502,ABRD,9.0,Study Abroad Independent Enrollment ...,[],[]
ABRD:4510,This is a reciprocal exchange program between ...,https://myui.uiowa.edu/my-ui/courses/details.p...,University College,ABRD:4510,4510,ABRD,16.0,International Student Exchange Program ...,[],[]
ANTH:1000,A Tour of Biological Anthropology: Explore the...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,ANTH:1000,1000,ANTH,1.0,First-Year Seminar ...,[],[]
ASIA:1040,This course will offer an introduction to the ...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,ASIA:1040,1040,ASIA,3.0,Living Religions of the East,[],[]
BIOL:3626,Conceptual understanding and technical skills ...,https://myui.uiowa.edu/my-ui/courses/details.p...,,BIOL:3626,3626,BIOL,,Cell Biology Laboratory,[],[]


In [76]:
indexed_df.index = indexed_df.index + indexed_df.groupby(by=indexed_df.index).cumcount().astype(str).replace('0', '')

In [89]:
indexed_df['courseID'] = indexed_df['courseID'] + indexed_df['courseID'].groupby(by=indexed_df.index).cumcount().astype(str).replace('0', '')

In [90]:
indexed_df.iloc[1000:1100]

Unnamed: 0,description,link,college,courseID,course_num,department,hrs,name,prereqs,after
MATH:1380,This course includes mathematical problems ari...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1380,1380,MATH,4.0,Calculus and Matrix Algebra for Business,"[{'source': 'MATH:1020', 'target': 'MATH:1380'...","[{'source': 'MATH:1380', 'target': 'MATH:1020'..."
MATH:1440,This course consists largely of precalculus to...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1440,1440,MATH,4.0,Mathematics for the Biological Sciences,"[{'source': 'MATH:1010', 'target': 'MATH:1440'...","[{'source': 'MATH:1440', 'target': 'MATH:1010'..."
MATH:1460,This course is a one-semester survey of calcul...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1460,1460,MATH,4.0,Calculus for the Biological Sciences,"[{'source': 'MATH:1440', 'target': 'MATH:1460'...","[{'source': 'MATH:1460', 'target': 'MATH:1440'..."
MATH:1550,This is the first semester of a five-semester ...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1550,1550,MATH,4.0,Engineer Math I Single Variable Calculus,"[{'source': 'MATH:1380', 'target': 'MATH:1550'...","[{'source': 'MATH:1550', 'target': 'MATH:1380'..."
MATH:1560,This is the second semester of a five-semester...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1560,1560,MATH,4.0,Engineer Math II: Multivariable Calculus,"[{'source': 'MATH:1550', 'target': 'MATH:1560'...","[{'source': 'MATH:1560', 'target': 'MATH:1550'..."
MATH:1850,"Topics include fundamental concepts, methods, ...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1850,1850,MATH,4.0,Calculus I,"[{'source': 'MATH:1380', 'target': 'MATH:1850'...","[{'source': 'MATH:1850', 'target': 'MATH:1380'..."
MATH:1860,This is a continuation of MATH:1850. This is c...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:1860,1860,MATH,4.0,Calculus II,"[{'source': 'MATH:1550', 'target': 'MATH:1860'...","[{'source': 'MATH:1860', 'target': 'MATH:1550'..."
MATH:2550,This course is an abbreviated version of MATH:...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:2550,2550,MATH,2.0,Engineering Math III: Matrix Algebra,"[{'source': 'MATH:1860', 'target': 'MATH:2550'...","[{'source': 'MATH:2550', 'target': 'MATH:1860'..."
MATH:2560,This course covers basic theory and methods of...,https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:2560,2560,MATH,3.0,Engineer Math IV: Differential Equations,"[{'source': 'MATH:1860', 'target': 'MATH:2560'...","[{'source': 'MATH:2560', 'target': 'MATH:1860'..."
MATH:2700,"This course introduces students to vectors, li...",https://myui.uiowa.edu/my-ui/courses/details.p...,College of Liberal Arts and Sciences,MATH:2700,2700,MATH,4.0,Introduction to Linear Algebra,"[{'source': 'MATH:1860', 'target': 'MATH:2700'...","[{'source': 'MATH:2700', 'target': 'MATH:1860'..."


In [91]:
out_f = '../data/indexed-courses.json'
indexed_df.to_json(out_f, orient='index')

In [14]:
def make_network(a_list):
    if (len(a_list) < 1):
        return
    
    nodes = []
    before = []
    after = []
    
    for i in a_list:
        course = indexed_df.loc[i]
        for i in course['prereqs']:
            prereq = indexed_df.loc[i['source']]
            nodes.append(prereq)
            before.append(i)
        for i in course['after']:
            antecedent = indexed_df.loc[i['target']]
            nodes.append(antecedent)
            after.append(i)
    return {"nodes": nodes, "before": before, "after": after}

In [15]:
make_network(['CW:3870'])

{'after': [{'source': 'CW:3870', 'target': 'CW:2870', 'value': 1}],
 'before': [{'source': 'CW:2870', 'target': 'CW:3870', 'value': 1}],
 'nodes': [description    Writing fiction is a craft and a process.  Stu...
  link           https://myui.uiowa.edu/my-ui/courses/details.p...
  college                     College of Liberal Arts and Sciences
  courseID                                                 CW:2870
  course_num                                                  2870
  department                                                    CW
  hrs                                                            3
  name                                             Fiction Writing
  prereqs                                                       []
  after                                                         []
  Name: CW:2870, dtype: object,
  description    Writing fiction is a craft and a process.  Stu...
  link           https://myui.uiowa.edu/my-ui/courses/details.p...
  college         