In [45]:
import pandas as pd
import numpy as np

In [46]:
def one_hot_list(data, column, prefix=None):
    data[column] = data[column].str.split(", ")
    data = data.explode(column)
    data_enc = pd.get_dummies(data[column], prefix=prefix)
    encoded_data = pd.concat([data, data_enc], axis=1)
    return encoded_data

# Courses compulsory for each degree

In [47]:
enrol_nums_data = pd.read_excel("Raw Data/Course Enrollment Numbers.xlsx", sheet_name="abbreviated course names")

In [48]:
enrol_nums_data.to_excel("Processed Data/web_scraped_data.xlsx", index = False)

In [49]:
compulsory_enc = one_hot_list(enrol_nums_data, "Degrees Compulsory For")
# compulsory_enc.to_excel("compulsory_encoded.xlsx", index = False)

In [50]:
list(compulsory_enc.columns)[11:]

['ambsc',
 'ammmath',
 'cammsc',
 'cmfft',
 'cmfpt',
 'fmoft',
 'fmopt',
 'mbsc',
 'mma',
 'mmmath',
 'mnbi',
 'mnbu',
 'mnmu',
 'mnph',
 'mnst',
 'orco',
 'ords',
 'ormsc',
 'orwr',
 'stds',
 'storft',
 'storpt']

In [51]:
comp_ambsc = compulsory_enc[compulsory_enc["ambsc"] == True] 
comp_ammmath = compulsory_enc[compulsory_enc["ammmath"] == True]
comp_cammsc = compulsory_enc[compulsory_enc["cammsc"] == True]
comp_cmfft = compulsory_enc[compulsory_enc["cmfft"] == True]
comp_cmfpt = compulsory_enc[compulsory_enc["cmfpt"] == True]
comp_fmoft = compulsory_enc[compulsory_enc["fmoft"] == True] 
comp_fmopt = compulsory_enc[compulsory_enc["fmopt"] == True]
comp_mbsc = compulsory_enc[compulsory_enc["mbsc"] == True]
comp_mma = compulsory_enc[compulsory_enc["mma"] == True]
comp_mmmath = compulsory_enc[compulsory_enc["mmmath"] == True]
comp_mnbi = compulsory_enc[compulsory_enc["mnbi"] == True] 
comp_mnbu = compulsory_enc[compulsory_enc["mnbu"] == True]
comp_mnmu = compulsory_enc[compulsory_enc["mnmu"] == True]  
comp_mnph = compulsory_enc[compulsory_enc["mnph"] == True] 
comp_mnst = compulsory_enc[compulsory_enc["mnst"] == True]
comp_orco = compulsory_enc[compulsory_enc["orco"] == True] 
comp_ords = compulsory_enc[compulsory_enc["ords"] == True]
comp_ormsc = compulsory_enc[compulsory_enc["ormsc"] == True]
comp_orwr = compulsory_enc[compulsory_enc["orwr"] == True]
comp_stds = compulsory_enc[compulsory_enc["stds"] == True]
comp_storft = compulsory_enc[compulsory_enc["storft"] == True]  
comp_storpt = compulsory_enc[compulsory_enc["storpt"] == True] 

# Courses within each category

In [52]:
category_enc = one_hot_list(enrol_nums_data, "Categories")

In [53]:
list(category_enc.columns)[11:]

['Algebra',
 'Analysis',
 'Applied & Computational',
 'Data Science',
 'Financial',
 'Geometry & Topology',
 'Mathematical Physics',
 'Optimization & Operational Research',
 'Probability',
 'Statistics']

In [54]:
algebra_courses = category_enc[category_enc["Algebra"] == True]
analysis_courses = category_enc[category_enc["Analysis"] == True]
app_comp_courses = category_enc[category_enc["Applied & Computational"] == True]
data_science_courses = category_enc[category_enc["Data Science"] == True]
financial_courses = category_enc[category_enc["Financial"] == True]
geom_top_courses = category_enc[category_enc["Geometry & Topology"] == True]
mat_phys_courses = category_enc[category_enc["Mathematical Physics"] == True]
opt_or_courses = category_enc[category_enc["Optimization & Operational Research"] == True]
probability_courses = category_enc[category_enc["Probability"] == True]
statistics_courses = category_enc[category_enc["Statistics"] == True]


# Testing

In [55]:
enc_tt_data = pd.read_excel("Processed Data/encoded_timetable_data.xlsx")

In [56]:
enc_tt_grouped = enc_tt_data.sort_values(by=["ws_groups"])

# Sets of Rooms

In [57]:
room_data = pd.read_excel("Processed Data/encoded_room_data.xlsx")

In [58]:
all_rooms = room_data
lecture_theatres = room_data[room_data["layout_theatre"]]
classrooms = room_data[(room_data["layout_classroom"] | room_data["layout_boardroom"])]
comp_labs = room_data[room_data["layout_comp_lab"]]

In [59]:
def write_rooms(room_type, type_name):
    output = f"{type_name}: [ " + " ".join('"' + room + '"' for room in list(room_type.room_name.values)) + " ]\n\n"
    return output

# Writing txt file

In [60]:
proc_tt = pd.read_excel("Processed Data/encoded_timetable_data.xlsx").copy()
proc_tt.drop(proc_tt.index[proc_tt.Code == "MATH11220"].tolist(), inplace = True)
courses_unique = proc_tt.drop_duplicates(subset = ["Code"])
courses_unique.columns

Index(['Course', 'Code', 'Linked Course', 'Activity', 'Activity Type Name',
       'Planned Size', 'Real Size', 'Delivery Semester',
       'Teaching Week Pattern', 'Number Of Teaching Weeks', 'Scheduled Days',
       'Scheduled Start Time', 'Scheduled End Time', 'Duration', 'Total Hours',
       'Zone Name', 'room_name', 'Delivery Period', 'Normal Year Taken',
       'building', 'encoded_days', 'Abbreviated Course Name', 'time_periods',
       'duration_hours', 'total_duration_hours', 'course_classtype',
       'ws_groups', 'no_of_agg_ws_groups', 'aggregated_ws_groups',
       'sem1_pattern_one_hot', 'sem2_pattern_one_hot'],
      dtype='object')

In [64]:
# Strings to be written before each array
# File path
output_file = f"Course_groupings.txt"

# Writing to the file
with open(output_file, 'w') as file:
    # Write degree programmes
    file.write("degree_programme_names: [")
    np.savetxt(file, list(compulsory_enc.columns)[11:], fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n course_codes: [")
    np.savetxt(file, courses_unique.Code.values, fmt='%s', delimiter=' ', newline=" ")
    file.write("] \n \n course_names: [")
    np.savetxt(file, courses_unique['Abbreviated Course Name'].values, fmt='%s', delimiter=' ', newline=" ")
    file.write("] \n \n")

    # Write course groupings by subject
    file.write(" algebra_courses: [")
    np.savetxt(file, algebra_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n analysis_courses: [")
    np.savetxt(file, analysis_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n app_comp_courses: [")
    np.savetxt(file, app_comp_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n data_science_courses: [")
    np.savetxt(file, data_science_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n financial_courses: [")
    np.savetxt(file, financial_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n geom_top_courses: [")
    np.savetxt(file, geom_top_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n mat_phys_courses: [")
    np.savetxt(file, mat_phys_courses.Code.values, fmt='%s', delimiter=' ', newline=" ") 
    file.write("] \n \n opt_or_courses: [")
    np.savetxt(file, opt_or_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")
    file.write("] \n \n probability_courses: [")
    np.savetxt(file, probability_courses.Code.values, fmt='%s', delimiter=' ', newline=" ") 
    file.write("] \n \n statistics_courses: [")
    np.savetxt(file, statistics_courses.Code.values, fmt='%s', delimiter=' ', newline=" ")  
    file.write("] \n \n")

    # write compulsory courses for each degree programme
    file.write(" comp_ambsc: [")
    np.savetxt(file, comp_ambsc.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_ammmath: [")
    np.savetxt(file, comp_ammmath.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_cammsc: [")
    np.savetxt(file, comp_cammsc.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_cmfft: [")
    np.savetxt(file, comp_cmfft.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_cmfpt: [")
    np.savetxt(file, comp_cmfpt.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_fmoft: [")
    np.savetxt(file, comp_fmoft.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_fmopt: [")
    np.savetxt(file, comp_fmopt.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mbsc: [")
    np.savetxt(file, comp_mbsc.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mma: [")
    np.savetxt(file, comp_mma.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mmmath: [")
    np.savetxt(file, comp_mmmath.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mnbi: [")
    np.savetxt(file, comp_mnbi.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mnbu: [")
    np.savetxt(file, comp_mnbu.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mnmu: [")
    np.savetxt(file, comp_mnmu.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mnph: [")
    np.savetxt(file, comp_mnph.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_mnst: [")
    np.savetxt(file, comp_mnst.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_orco: [")
    np.savetxt(file, comp_orco.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_ords: [")
    np.savetxt(file, comp_ords.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_ormsc: [")
    np.savetxt(file, comp_ormsc.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_orwr: [")
    np.savetxt(file, comp_orwr.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_stds: [")
    np.savetxt(file, comp_stds.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_storft: [")
    np.savetxt(file, comp_storft.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n comp_storpt: [")
    np.savetxt(file, comp_storpt.Code.values, fmt='%s', delimiter=' ', newline=" ")  # You can adjust fmt and delimiter as needed
    file.write("] \n \n")


    file.write(write_rooms(all_rooms, "all_rooms"))
    room_type_output = output = "all_room_types: [ " + " ".join('"' + room + '"' for room in list(all_rooms.room_layout.values)) + " ]\n\n"
    file.write(room_type_output)

    file.write(write_rooms(lecture_theatres, "lecture_theatres"))
    file.write(write_rooms(classrooms, "classrooms"))
    file.write(write_rooms(comp_labs, "comp_labs"))

# Shortening txt code

In [62]:
# courses_dict = {
#     "algebra_courses": algebra_courses,
#     "analysis_courses": analysis_courses,
#     "app_comp_courses": app_comp_courses,
#     "data_science_courses": data_science_courses,
#     "financial_courses": financial_courses,
#     "geom_top_courses": geom_top_courses,
#     "mat_phys_courses": mat_phys_courses,
#     "opt_or_courses": opt_or_courses,
#     "probability_courses": probability_courses,
#     "statistics_courses": statistics_courses
# }

# # Open the file for writing
# with open("course_data.txt", "w") as file:
#     # Iterate over the dictionary items
#     for course_name, course_df in courses_dict.items():
#         # Write course name
#         file.write(f"{course_name}: [")
#         # Write course codes
#         np.savetxt(file, course_df.Code.values, fmt='%s', delimiter=' ', newline=" ")
#         # Write newline and closing bracket
#         file.write("]\n\n")