In [1]:
import os
import pandas as pd
import json
import math

In [6]:
LOOKBACK_RANGE = 10

In [7]:
catalog_filenames = []
for filename in os.listdir(path = "./catalog/"):
    if filename[-4:] == ".csv":
        catalog_filenames.append(filename)
catalog_filenames = sorted(catalog_filenames)[-LOOKBACK_RANGE:]

In [8]:
schedule_filenames = []
for filename in os.listdir(path = "./schedule/"):
    if filename[-4:] == ".csv":
        schedule_filenames.append(filename)

In [9]:
catalogs = []
for catalog_filename in catalog_filenames:
    print(catalog_filename)
    catalog = pd.read_csv("./catalog/" + catalog_filename, index_col="Course")
    catalogs.append(catalog)
    
cumulative = pd.concat(catalogs[::-1])
cumulative = cumulative[~cumulative.index.duplicated(keep='first')]

2015-2016.csv
2016-2017.csv
2017-2018.csv
2018-2019.csv
2019-2020.csv
2020-2021.csv
2021-2022.csv
2022-2023.csv
2023-2024.csv
2024-2025.csv


In [10]:
for catalog_filename in catalog_filenames:
    yr_range = catalog_filename[:-4].split("-")
    print(yr_range)
    
    fall_sem = f"Fall {yr_range[0]}"
    if f"{fall_sem}.csv" in schedule_filenames:
        fall_schedule = pd.read_csv(f"./schedule/{fall_sem}.csv")
        cumulative[fall_sem] = cumulative.index.isin(fall_schedule["Course"])
    
    spring_sem = f"Spring {yr_range[1]}"
    if f"{spring_sem}.csv" in schedule_filenames:
        spring_schedule = pd.read_csv(f"./schedule/{spring_sem}.csv")
        cumulative[spring_sem] = cumulative.index.isin(spring_schedule["Course"])
    
    summer_sem = f"Summer {yr_range[1]}"
    if f"{summer_sem}.csv" in schedule_filenames:
        summer_schedule = pd.read_csv(f"./schedule/{summer_sem}.csv")
        cumulative[summer_sem] = cumulative.index.isin(summer_schedule["Course"])

['2015', '2016']
['2016', '2017']
['2017', '2018']
['2018', '2019']
['2019', '2020']
['2020', '2021']
['2021', '2022']
['2022', '2023']
['2023', '2024']
['2024', '2025']


In [11]:
# Export data to CSV file
cumulative.to_csv("collated.csv")
cumulative.to_json("collated.json")

In [12]:
new_cumulative = {column:[] for column in cumulative}
new_cumulative["Course"] = list(cumulative.index)
for column in list(cumulative)[:5]:
    new_cumulative[column] = cumulative[column].values.tolist()
for column in list(cumulative)[5:]:
    new_cumulative[column] = [int(val) for val in cumulative[column].values.tolist()]

In [13]:
temp = new_cumulative["Distribution Group"]
new_cumulative["Distribution Group"] = ["" if type(val) == type(.0) else val for val in temp]
temp = new_cumulative["Analyzing Diversity"]
new_cumulative["Analyzing Diversity"] = [int(type(val) != type(.0)) for val in temp]

In [14]:
with open('collated_min.json', 'w', encoding ='utf8') as json_file:
    json.dump(new_cumulative, json_file, ensure_ascii = False)