In [4]:
import requests
import pandas as pd
import os
import json

In [5]:
# Read coures_units file in ../data
df = pd.read_csv('../data/course_units.csv')
df.head()

Unnamed: 0,course_name,course_code,unit_name,unit_code,unit_category,unit_zone,year
0,Master of Data Science,C000082,Web Technology,COMP6110,Elective units,Foundation Zone,2024
1,Master of Data Science,C000082,"Graphics, Multivariate Methods and Data Mining",STAT6102,Elective units,Foundation Zone,2024
2,Master of Data Science,C000082,Management of IT Systems and Projects,COMP6770,Elective units,Foundation Zone,2024
3,Master of Data Science,C000082,Introductory Statistics,STAT6170,Elective units,Foundation Zone,2024
4,Master of Data Science,C000082,Foundations of Computer Programming,COMP6010,Elective units,Foundation Zone,2024


In [13]:
df['unit_category'].unique()

array(['Elective units', 'Essential Units', 'Essential units',
       'Capstone unit', 'Electives', 'Major', 'Elective units ',
       'Specialisation', 'Capstone units', 'Master essential units',
       'Transitional essential unit', 'Transitional elective units',
       'Master elective units', 'Elective Units', 'Elective Units ',
       'Stage 3 - Essential Units', 'Stage 2 - Essential Units',
       'Stage 1 - Essential Units', 'Stage 4 - Essential Units',
       'Stage 1 - Essential unit', 'Stage 1 - Elective units',
       'Stage 2 - Elective units', 'Stage 2 - Essential unit',
       'Stage 3 - Essential units', 'Stage 2 - Essential units',
       'Stage 4 - Essential units', 'Stage 1 - Essential units',
       'Capstone Unit', 'Elective units - Cyber Security ',
       'Cyber Security ', 'Minor', 'Essential unit',
       'Applied Artificial Intelligence', 'Option set'], dtype=object)

In [14]:
def save_to_excel(unit_info):
    # Export to csv
    unit_info.to_csv(os.path.join('..', 'data', 'unit_info.csv'), index=False)

In [17]:
# Remove duplicated rows with same unit_code
df = df.drop_duplicates(subset=['unit_code'], keep='first')

# Create a df with: unit_name, unit_code, year, unit_period, description, handbook_url, unit_rules, assessments, has_final_exam
unit_info = pd.DataFrame(columns=['unit_code', 'year', 'unit_period', 'description', 'handbook_url', 'unit_rules', 'assessments', 'has_final_exam'])

# Load existed unit_info
unit_info = pd.read_csv(os.path.join('..', 'data', 'unit_info.csv'))

count = 0
url = 'https://coursehandbook.mq.edu.au/api/es/search'
# Iterate through each unit
for index, row in df.iterrows():

    #Skip to index 2193
    if index < 2193:
        continue

    # if count == 10:
    #     break

    # Only process units, not majors, specializations,...
    if "unit" not in row['unit_category'].lower():
        continue

    print('Processing unit {}...'.format(row['unit_code']))
    print("Current index: ", index)
    print("Count: ", count)
    
    unit = row['unit_code']
    unit_name = row['unit_name']

    json_param = {
        "query": {
            "bool": {
                "must": [
                    {
                        "query_string": {
                            "query": "mq2_psubject.code: \"{}\"".format(unit)
                        }
                    }
                ]
            }
        }
    }
    req = requests.post('https://coursehandbook.mq.edu.au/api/es/search', json=json_param)
    data = req.json()
    # Check if 'contentlets' is not empty before accessing an element
    if 'contentlets' in data and data['contentlets']:
        # Get the most recent json by "implementationYear"
        general_data = max(data['contentlets'], key=lambda x: x.get('implementationYear', 0))

        # generalData = data['contentlets'][-1]
        unit_data = json.loads(general_data['data'])
        
        # Continue with the rest of your code
        year = unit_data['implementation_year']
    else:
        print("No contentlets found for unit:", unit)
        continue

    # Get year
    year = unit_data['implementation_year']
    # Get unit offerings
    # for offering in unit_data['unit_offering']:
    #     # Use concat to append a row to the dataframe
    #     offering_session = offering['teaching_period']['value'] + ' - ' + offering['attendance_mode']['value']
    #     unit_info = pd.concat([unit_info, pd.DataFrame([[unit, year, offering_session]], columns=['unit_code', 'year', 'unit_period'])], ignore_index=True)
    offerings = []
    for offering in unit_data['unit_offering']:
    # Use concat to append a row to the dataframe
        offering_session = offering['teaching_period']['value'] + ' - ' + offering['attendance_mode']['value']
        offerings.append(offering_session)
    # Get unit info
    final_exam = False
    def get_unit_rules(rules):
        requisites = rules['requisites']
        rules = rules["enrolment_rules"]
        unit_rules = {}
        for rule in rules:
            rule_units = rule['description']
            rule_type = rule['type']['value']
            rule_label = rule['type']['label']
            # If nccw then get label instead because it often includes year
            if rule_type == 'nccw':
                rule_type = rule_label
            # Add to unit_rules group by rule_type
            if rule_type not in unit_rules:
                unit_rules[rule_type] = []
                unit_rules[rule_type].append(rule_units)
            else:
                unit_rules[rule_type].append(rule_units)
        # Get co-badge
        for r in requisites:
            for c in r['containers']:
                for units in c['relationships']:
                    if unit_rules.get("Co-badge") is None:
                        unit_rules["Co-badge"] = []
                    unit_rules["Co-badge"].append(units['academic_item_code'])
        
        return unit_rules
    def get_assessments(assessments):
        global final_exam
        assessment_list = {}
        for assessment in assessments:
            assessment_name = assessment['assessment_title']
            # Check for final exam
            if 'final exam' in assessment_name.lower():
                final_exam = True
            assessment_type = assessment['type']['label']
            assessment_weight = assessment['weight']
            assessment_hurdle = assessment['hurdle_task']
            assessment_description = assessment['description']
            # Add to assessment_list
            assessment_list[assessment_name] = {
                'assessment_type': assessment_type,
                'assessment_weight': assessment_weight,
                'assessment_hurdle': assessment_hurdle,
                'assessment_description': assessment_description
            }
        return assessment_list

    print(general_data.get("description", ""))
    print("https://coursehandbook.mq.edu.au"+general_data['urlMap'])
    print(get_unit_rules(unit_data))
    print(get_assessments(unit_data['assessments']))
    print(unit_data['implementation_year'])
    print("Final exam: ", final_exam)
    unit_info = pd.concat([unit_info, 
                           pd.DataFrame([[
                               unit, unit_name, year, offerings, general_data.get("description", ""), "https://coursehandbook.mq.edu.au"+general_data.get('urlMap',""), get_unit_rules(unit_data), get_assessments(unit_data.get('assessments',"")), final_exam]], 
                               columns=['unit_code', 'unit_name', 'year', 'unit_period', 'description', 'handbook_url', 'unit_rules', 'assessments', 'has_final_exam'])], ignore_index=True)
    # Save every 100 units
    if count % 100 == 0:
        save_to_excel(unit_info)
    count += 1
# Save
save_to_excel(unit_info)

Processing unit PICT7000...
Current index:  2193
Count:  0

https://coursehandbook.mq.edu.au/2020/units/PICT7000/
{}
{}
2020
Final exam:  False
Processing unit ANTH7001...
Current index:  2194
Count:  1
<p>This core unit in the Master of Research specialisation in Anthropology provides a grounding in theoretical, methodological and interpretive issues currently being debated by anthropologists. The seminars deal with a selected number of theoretical, methodological and interpretative issues that are currently debated in the discipline. These issues will vary from year to year according to contemporary developments in Anthropology and the interests of the course convener, and in terms of how current concerns in the discipline link to the theoretical issues addressed by students. Others may be more enduring, such as the theoretical issues related to the &#39;writing culture&#39; debate, &#39;orientalism&#39; and the problem of the &#39;other&#39;, cultural relativism, politics and power,