In [1]:
# Libraries
import os
import pandas as pd
import glob
import json

In [2]:
DATA_PATH = os.path.join("..", "data")
CSV_SEPARATOR = "|"

PROGRAMS = {
    901 : "INFORMATION SYSTEMS",
    902 : "COGNITIVE SCIENCES",
    903 : "MODELLING AND SIMULATION",
    904 : "INFORMATICS ONLINE",
    905 : "SOFTWARE MANAGEMENT",
    906 : "MEDICAL INFORMATICS",
    908 : "BIOINFORMATICS",
    909 : "MULTIMEDIA INFORMATICS",
    910 : "CYBER SECURITY",
    911 : "DATA INFORMATICS"
}

In [3]:
# All courses opened by a specific department
courses_opened_by_a_specific_department_path = os.path.join(f"{DATA_PATH}","*all_courses.csv")
courses_opened_by_a_specific_department = glob.glob(courses_opened_by_a_specific_department_path)

print(courses_opened_by_a_specific_department[:3])
print()

['../data/908_all_courses.csv', '../data/911_all_courses.csv', '../data/901_all_courses.csv']



In [4]:
# All information about each course
# e.g. course content, course objectives, learning outcomes...
course_contents_path = os.path.join(f"{DATA_PATH}","*.json")
course_contents = glob.glob(course_contents_path)
print(course_contents[:3])

['../data/9060537.json', '../data/9030515.json', '../data/9020515.json']


In [5]:
# All courses opened by a specific department in 2024-2025 fall semester
# 20241 means "Semester: 2024-2025 Fall"
courses_opened_in_2024_2025_Fall_path = os.path.join(f"{DATA_PATH}", "20241-*.csv")
courses_opened_in_2024_2025_Fall = glob.glob(courses_opened_in_2024_2025_Fall_path)
print(courses_opened_in_2024_2025_Fall[:3])

['../data/20241-911-merged.csv', '../data/20241-908-merged.csv', '../data/20241-904.csv']


### Scraped Data Contents
![](img/scraped_data.png)

In [6]:
def trim_before_pipe(str):
    pipe_index = str.find('|')
    
    if pipe_index != -1:
        str = str[pipe_index + 1:]

    return str


def trim_buzz_words(str):
    str = str.replace(" Course Syllabus", "")
    str = str.replace("Course Objectives", "")
    str = str.replace("Course Learning Outcomes", "")
    return str


def find_in(program, collection):
    return list(filter(lambda x: str(program) in x, collection))


def merge_data(all_courses_file, semester_courses_file, courses_contents_files):
    
    df_semester_courses = pd.read_csv(semester_courses_file)
    df_merged = df_semester_courses.copy()
    
    # Below columns should be added to new dataframe
    attributes = ["Language of Instruction",
                  "Level of Study",
                  "Course Objectives",
                  "Course Learning Outcomes",
                  "Course Content"]
    
    for attr in attributes:
         df_merged[attr] = len(df_semester_courses) * [None]
    
    
    # Read the course content and update the dataframe
    for c in courses_contents_files:

          with open(c, 'r', encoding='utf-8') as file:
               data = json.load(file)
               
               filtered_data = {key: data.get(key, "Not Available") for key in attributes}

               # Clean data
               filtered_data["Course Objectives"] = trim_before_pipe(trim_buzz_words(filtered_data["Course Objectives"]))
               filtered_data["Course Learning Outcomes"] = trim_before_pipe(trim_buzz_words(filtered_data["Course Learning Outcomes"]))

               # Update the 'key' column where 'Course Code' equals 'data["Course Code"]'
               for key, value in filtered_data.items():
                    # print(data["Course Code"], key, f"'{value}'")
                    
                    filtered_rows = df_merged['Course Code'] == int(data["Course Code"])
                    df_merged.loc[filtered_rows, key] = value

    return df_merged

In [104]:
for program in PROGRAMS.keys():
     # For program 901,
     
     # 901_all_courses.csv
     all_courses_file = find_in(program, courses_opened_by_a_specific_department)[0]
     
     # 20241-901.csv
     semester_courses_file = find_in(program, courses_opened_in_2024_2025_Fall)[0]
     
     # All json files started with 901
     courses_contents_files = find_in(program, course_contents)

     # print(all_courses_file, semester_courses_file, courses_contents_files)


     merged_data = merge_data(all_courses_file, semester_courses_file, courses_contents_files)
     filename = semester_courses_file[:-4] + "-merged.csv"
     merged_data.to_csv(filename, sep=CSV_SEPARATOR, index=False)

In [105]:
merged_path = os.path.join(f"{DATA_PATH}","*merged.csv")
merged_files = glob.glob(merged_path)
merged_files

['../data/20241-911-merged.csv',
 '../data/20241-908-merged.csv',
 '../data/20241-909-merged.csv',
 '../data/20241-904-merged.csv',
 '../data/20241-901-merged.csv',
 '../data/20241-906-merged.csv']

In [106]:
pd.read_csv(merged_files[0], sep=CSV_SEPARATOR).head(5)

Unnamed: 0,Semester,Program Code,Program Short Name,Course Code,Course Name,Credit,ECTS Credit,Course Section,Capacity,Day1,Start Hour1,End Hour1,Instructor Name,Instructor Title,Language of Instruction,Level of Study,Course Objectives,Course Learning Outcomes,Course Content
0,2024-2025 Fall,911,DI,9110501,INTRODUCTION TO DATA INFORMATICS,3,8.0,1,21,Thursday,13:40,16:30,ALTAN KOÇYİĞİT,Prof.Dr.,English,Graduate,"At the end of this course, the student will kn...","Student, who passed the course statisfactorily...",This course explains the complete process cycl...
1,2024-2025 Fall,911,DI,9110502,DATA INFORMATICS PROJECT,3,8.0,1,21,Wednesday,09:40,12:30,TUĞBA TAŞKAYA TEMİZEL,Prof.Dr.,English,Graduate,By the end of the course the students are expe...,,This course enables students to gain an unders...
2,2024-2025 Fall,911,DI,9110504,FOUNDATIONS OF DEEP LEARNING,3,8.0,1,30,Thursday,09:40,12:30,ERDEM AKAGÜNDÜZ,Assoc.Prof.Dr.,English,Graduate,The course will cover various subjects related...,Via this course and by the successful completi...,The course will cover various subjects related...
3,2024-2025 Fall,911,DI,9110590,GRADUATE SEMINAR,0,10.0,1,90,Wednesday,12:40,13:30,UMUT ŞENER,Res.Dr.,English,Graduate,,,M.S. students choose and study a topic under t...
4,2024-2025 Fall,911,DI,9110592,MATHEMATICS FOR DATA INFORMATICS,3,8.0,1,20,Friday,09:40,12:30,SERKAN KAYGIN,Lecturer Dr.,English,Graduate,"At the end of this course, the student will kn...",By the successful completion of the course the...,This course reviews the fundamental mathematic...
