In [1]:
import requests
import pandas as pd
import os
import json

In [2]:
df = pd.read_csv(os.path.join('..', 'data', 'courses.csv'))
df.head()

Unnamed: 0,course_name,course_type,course_code,course_url
0,Master of Data Science,Course,C000082,https://www.mq.edu.au/study/find-a-course/cour...
1,Bachelor of Laws,Course,C000132,https://www.mq.edu.au/study/find-a-course/cour...
2,Master of Laws,Course,C000184,https://www.mq.edu.au/study/find-a-course/cour...
3,Master of Planning,Course,C000033,https://www.mq.edu.au/study/find-a-course/cour...
4,Major in Data Science,Majors and Specialisations,,https://www.mq.edu.au/study/find-a-course/cour...


In [3]:
course_param = df['course_url'][0].split('/')[-1]

In [4]:
# For each row in df, extract the param in course_url, make a request to the API, get the response, get the units and its category, and add them to the dataframe
# Final df shape: course_name, course_code, unit_name, unit_code, unit_category, unit_zone, year
# Create a new df with the columns above
units_df = pd.DataFrame(columns=['course_name', 'course_code', 'unit_name', 'unit_code', 'unit_category', 'unit_zone', 'year'])
count = 0

for index, row in df.iterrows():
    # Limit 3 requests
    # if count == 4:
    #     break
    
    # Filter: Skip Double Degree
    if 'Double' in row['course_type']:
        continue
    
    print("Count: ", count)
    print("Current course: ", row['course_name'])

    course_param = row['course_url'].split('/')[-1]
    if 'Major' in row['course_name'] and 'Major' in row['course_type']:
        url = "https://www.mq.edu.au/study/page-data/find-a-course/courses/major/{}/page-data.json".format(course_param)
    elif 'Specialisation' in row['course_name'] and 'Major' in row['course_type']:
        url = "https://www.mq.edu.au/study/page-data/find-a-course/courses/specialisation/{}/page-data.json".format(course_param)
    else:
        url = "https://www.mq.edu.au/study/page-data/find-a-course/courses/{}/page-data.json".format(course_param)
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # The info is a json inside a json, so we need to load it twice
        data_json = json.loads(data['result']['data']['current']['fields']['json'])
        # Get the year
        year = data_json['year']
        # Extract curriculum structure
        if data_json.get("curriculum_structure", []):
            curriculum_structure = data_json.get("curriculum_structure", [])[0].get("container", [])
        else: 
            print("No curriculum structure found for {}".format(row['course_name']))
        
        # Major and Course have different json structures
        if 'Major' in row['course_type']:
            for unit_category in curriculum_structure:
                print(unit_category.get('title', None))
                print("------")
                # Loop through each relationship in the unit_category
                for unit in unit_category['relationship']:
                    print(unit.get('academic_item_code', None), unit.get('academic_item_name', None))
                    # Add to units_df
                    units_df = pd.concat([units_df,pd.DataFrame([{
                        'course_name': row['course_name'],
                        'course_code': row['course_code'],
                        'unit_name': unit.get('academic_item_name', None),
                        'unit_code': unit.get('academic_item_code', None),
                        'unit_category': unit_category.get('title', None),
                        'unit_zone': zone.get('title', None),
                        'year': year
                    }])], ignore_index=True)
                    # Export to csv
                    units_df.to_csv(os.path.join('..', 'data', 'course_units.csv'), index=False)
        else:
            # Loop through each zone in the curriculum
            for zone in curriculum_structure:
                print(zone.get('title', None))
                print("-------------------")

                # Loop through each subzone in the zone
                for unit_category in zone['container']:
                    print(unit_category.get('title', None))
                    print("------")
                    # Loop through each relationship in the unit_category
                    for unit in unit_category['relationship']:
                        print(unit.get('academic_item_code', None), unit.get('academic_item_name', None))
                        # Add to units_df
                        units_df = pd.concat([units_df,pd.DataFrame([{
                            'course_name': row['course_name'],
                            'course_code': row['course_code'],
                            'unit_name': unit.get('academic_item_name', None),
                            'unit_code': unit.get('academic_item_code', None),
                            'unit_category': unit_category.get('title', None),
                            'unit_zone': zone.get('title', None),
                            'year': year
                        }])], ignore_index=True)
                        # Export to csv
                        units_df.to_csv(os.path.join('..', 'data', 'course_units.csv'), index=False)
            print("+++++++++++++")
    else:
        print("Error: ", response.status_code)
        
    count += 1

Count:  0
Current course:  Master of Data Science
Foundation Zone
-------------------
Elective units
------
COMP6110 Web Technology
STAT6102 Graphics, Multivariate Methods and Data Mining
COMP6770 Management of IT Systems and Projects
STAT6170 Introductory Statistics
COMP6010 Foundations of Computer Programming
COMP6350 Database Systems
COMP6760 Enterprise Systems Integration
STAT6180 Applied Statistics
Essential Units
------
STAT6110 Statistical Inference
COMP6200 Data Science
STAT6175 Linear Models
COMP6210 Big Data
COMP6420 Artificial Intelligence for Text and Vision
Core Zone
-------------------
Elective units
------
COMP8240 Applications of Data Science
COMP8440 Automated Decision Making in Business
STAT8150 Bayesian Data Analysis
COMP8230 Mining Unstructured Data
STAT8178 Modern Computational Statistical Methods
STAT8123 Statistical Graphics
Essential units
------
COMP8210 Big Data Technologies
COMP8851 Major Project
COMP8221 Advanced Machine Learning
STAT8111 Generalized Linear 

In [5]:
# url = "https://www.mq.edu.au/study/page-data/find-a-course/courses/major/data-science/page-data.json"
# url2 = "https://www.mq.edu.au/study/page-data/find-a-course/courses/master-of-data-science/page-data.json"

# # Make the request
# response = requests.get(url)

# # Check if the request was successful (status code 200)
# if response.status_code == 200:
#     # Parse the JSON response
#     data = response.json()

#     # Extract and print relevant information
#     page_data = data.get("result", {}).get("data", {}).get("page", {})

#     if page_data:
#         print("Title:", page_data.get("title", ""))
#         print("Description:", page_data.get("description", ""))
#         # Add more fields as needed

#     else:
#         print("No data found in the response.")
# else:
#     print(f"Error: {response.status_code}")
# # Parse JSON data from the result
# parsed_data = json.loads(data['result']['data']['current']['fields']['json'])

# # Extract curriculum structure
# curriculum_structure = parsed_data.get("curriculum_structure", [])[0].get("container", [])

# # Loop through each zone in the curriculum
# # for zone in curriculum_structure:
# #     print("Zone: ", zone['title'])
# #     print("-------------------")

# # Loop through each subzone in the zone
# for unit in curriculum_structure:
#     print("Unit Category: ", unit['title'])
#     print("------")
#     # Loop through each relationship in the unit
#     for relationship in unit['relationship']:
#         print(relationship['academic_item_code'], relationship['academic_item_name'])
#     print("+++++++++++++")
