## Courses Scraper

In [5]:
import requests
import pandas as pd
import os

In [6]:
# # Example JSON ouptut
# example_data = requests.get("https://websearch.mq.edu.au/s/search.json?collection=mq-edu-au-push-courses&profile=domestic&f.Study+level|studyLevel=Undergraduate&f.Study+type|courseType=Course&f.Study+type|courseType=Majors+and+Specialisations&query=!padrenull&start_rank=1")
# example_data.json()['response']['resultPacket']['results']

In [8]:
def courses_scraper(api_url, output_csv):
    data_list = []
    page = 1
    per_page = 10  # Adjust this based on the actual API response

    while True:
        params = {
            'collection': 'mq-edu-au-push-courses',
            'profile': 'domestic',
            'f.Study+level|studyLevel': 'Undergraduate',
            'f.Study+type|courseType': ['Course', 'Majors+and+Specialisations'],
            'query': '!padrenull',
            'start_rank': (page - 1) * per_page + 1
        }

        response = requests.get(api_url, params=params)
        print("This is response: ", response)

        if response.status_code == 200:
            # Parse the JSON response
            api_data = response.json()

            # Extract relevant information from the API response
            n = api_data['response']['resultPacket']['results']
            if not n:
                break  # No more data available
            for i in n:
                # Not every course has a course code
                course_code = i['listMetadata'].get('courseCode', ['N/A'])[0]
                # Append the extracted data to the list
                data_list.append({
                    'course_name': i['listMetadata']['courseName'][0],
                    'course_type': i['listMetadata']['courseType'][0],
                    'course_code': course_code,
                    'course_url': i['liveUrl']
                })
            # Move to the next page
            page += 1
        else:
            print(f"Error: {response.status_code}")
            break

    # Convert the list of dictionaries to a DataFrame
    print(data_list)
    df = pd.DataFrame(data_list)

    # Create the 'data' folder if it doesn't exist
    data_folder = '../data'  # Use '../data' to go up one level
    os.makedirs(data_folder, exist_ok=True)

    # Save DataFrame to CSV in the 'data' folder
    output_path = os.path.join(data_folder, output_csv)
    df.to_csv(output_path, index=False)
    print(f"Data successfully scraped and stored in {output_path}")

# Example usage
api_url = "https://websearch.mq.edu.au/s/search.json"
output_csv = "courses.csv"
courses_scraper(api_url, output_csv)

This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response [200]>
This is response:  <Response

## Units of Course Scraper

In [None]:
import requests

url = "https://www.mq.edu.au/study/page-data/find-a-course/courses/bachelor-of-laws/page-data.json"
url2 = "https://www.mq.edu.au/study/page-data/find-a-course/courses/bachelor-of-information-technology/page-data.json"

# Make the request
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Extract and print relevant information
    page_data = data.get("result", {}).get("data", {}).get("page", {})

    if page_data:
        print("Title:", page_data.get("title", ""))
        print("Description:", page_data.get("description", ""))
        # Add more fields as needed

    else:
        print("No data found in the response.")
else:
    print(f"Error: {response.status_code}")


No data found in the response.


In [None]:
data['result']['data']['current']['fields']['json']

'{"school":{"value":"Faculty of Arts","key":"name"},"published_in_handbook":{"label":"Yes","value":"1"},"search_title":"Bachelor of Laws","code":"C000132","abbreviated_name_and_major":"","implementation_year":"2024","credit_points":"320","accrediting_bodies":[],"enrolment_patterns":["Part Time","Full Time"],"content_type":"Course","study_level":"Undergraduate","academic_org":{"value":"","key":"name"},"status":{"label":"Approved","value":"Active"},"title":"Bachelor of Laws","abbreviated_name":"","aqf_level":{"label":"Level 7","value":"level_7"},"type":{"label":"Specialist Bachelor Degree","value":"specialist_bachelor_degree"},"description":null,"version":"2","course_code":"C000132","version_name":"2024.02","atar":"96","award_titles":"","cricos_code":"080288E","learning_and_teaching_methods":"<p>The Bachelor of Laws uses both lectures and small group tutorials. Tutorials emphasise participation and the development of verbal communication skills.</p>\\n<p><br /><br /><br /><br /></p>","su

In [None]:
# convert string to json
import json
converted = json.loads(data['result']['data']['current']['fields']['json'])

In [None]:
converted

{'school': {'value': 'Faculty of Arts', 'key': 'name'},
 'published_in_handbook': {'label': 'Yes', 'value': '1'},
 'search_title': 'Bachelor of Laws',
 'code': 'C000132',
 'abbreviated_name_and_major': '',
 'implementation_year': '2024',
 'credit_points': '320',
 'accrediting_bodies': [],
 'enrolment_patterns': ['Part Time', 'Full Time'],
 'content_type': 'Course',
 'study_level': 'Undergraduate',
 'academic_org': {'value': '', 'key': 'name'},
 'status': {'label': 'Approved', 'value': 'Active'},
 'title': 'Bachelor of Laws',
 'abbreviated_name': '',
 'aqf_level': {'label': 'Level 7', 'value': 'level_7'},
 'type': {'label': 'Specialist Bachelor Degree',
  'value': 'specialist_bachelor_degree'},
 'description': None,
 'version': '2',
 'course_code': 'C000132',
 'version_name': '2024.02',
 'atar': '96',
 'award_titles': '',
 'cricos_code': '080288E',
 'learning_and_teaching_methods': '<p>The Bachelor of Laws uses both lectures and small group tutorials. Tutorials emphasise participation a

In [None]:
converted['curriculum_structure'][0]

{'ai_to_cs_cl_id': 'dc9ff57b9776fd9049533c4ef053af9e',
 'curriculum_structure': {'value': '', 'key': 'version_code'},
 'source': {'value': '', 'key': ''},
 'effective_date': '',
 'credit_points': '320',
 'structure_cl_id': 'fc8002fb9776fd9049533c4ef053af72',
 'parent_table': 'x_f5sl_cl_courses',
 'name': 'Structure',
 'parent_id': {'value': 'Course: C000132',
  'key': 'code',
  'type': 'x_f5sl_cl_courses'},
 'container': [{'child_table': '',
   'footnote': '',
   'map_type': '',
   'parent_table': 'x_f5sl_cl_curriculum_structure',
   'parent_connector': {'label': 'AND', 'value': 'AND'},
   'description': 'You can use your flexible zone to enrol in any Undergraduate unit for which you meet the requisites. You may also use your flexible zone to complete a major or a minor(s).',
   'credit_points': '80',
   'horizontal_grouping': {'label': None, 'value': None},
   'title': 'Flexible Zone',
   'vertical_grouping': {'label': 'FLEXIBLE', 'value': 'flexible'},
   'credit_points_max': '',
   '

In [None]:
units = converted.get("curriculum_structure", [])[0].get("container", [])
for i in units[1]['container']:
    print(i['title'])
    print("------")
    for j in i['relationship']:
        print(j['academic_item_code'], j['academic_item_name'])

Elective units
------
LAWS5078 PACE: Clinics and Projects
LAWS5051 PACE: Individual Placements
Capstone unit
------
LAWS5000 Remedies, Reparations and Resolution in Law
Elective units
------
LAWS5056 Succession
LAWS5018 Indigenous Peoples and the Law
LAWS5020 Family Law
LAWS5065 Dispute Management and Resolution
LAWS5030 Law Moots and Competitions
LAWS5069 Energy and Natural Resources Law
LAWS5087 Law of Armed Conflict
LAWS5053 Taxation
LAWS5071 Health Law and Ethics
LAWS5910 Research Methodologies in Law
LAWS5043 Climate Change Law
LAWS5036 Modern Corporate Governance
LAWS5082 Insolvency Law
LAWS5010 International Trade Law
LAWS5021 Foundations of Commercial Law
LAWS5920 Legal Research Project
LAWS5080 Human Rights and Moral Dilemmas
LAWS5028 Competition Law
LAWS5079 Refugees and Migration
LAWS5057 International Private Law
LAWS5007 Employment and Labour Law 
LAWS5302 International Law Exchange 2
LAWS5040 Environmental Planning Law
LAWS5084 Media Law
LAWS5023 Intellectual Property Law