From a24d97083ade5ded65219a7c9d302dddf8b61380 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 29 Oct 2024 09:12:18 -0400 Subject: [PATCH 1/2] Ingest resources from Professional Education's new API --- app.json | 8 +- learning_resources/etl/constants.py | 6 +- learning_resources/etl/mitpe.py | 394 ++++++++++++++++++ learning_resources/etl/mitpe_test.py | 242 +++++++++++ learning_resources/etl/pipelines.py | 22 + learning_resources/etl/pipelines_test.py | 26 ++ learning_resources/etl/prolearn.py | 41 +- learning_resources/etl/prolearn_test.py | 42 +- learning_resources/etl/sloan.py | 1 - learning_resources/etl/sloan_test.py | 7 - .../commands/backpopulate_mitpe_data.py | 50 +++ .../commands/transfer_list_resources.py | 17 +- learning_resources/tasks.py | 7 + learning_resources/tasks_test.py | 12 + learning_resources/utils.py | 9 +- learning_resources/utils_test.py | 7 +- learning_resources_search/utils.py | 4 +- main/settings_celery.py | 4 + main/settings_course_etl.py | 3 +- .../mitpe/professional_ed_resources_0.json | 52 +++ .../mitpe/professional_ed_resources_1.json | 27 ++ .../mitpe/professional_ed_resources_2.json | 1 + 22 files changed, 919 insertions(+), 63 deletions(-) create mode 100644 learning_resources/etl/mitpe.py create mode 100644 learning_resources/etl/mitpe_test.py create mode 100644 learning_resources/management/commands/backpopulate_mitpe_data.py create mode 100644 test_json/mitpe/professional_ed_resources_0.json create mode 100644 test_json/mitpe/professional_ed_resources_1.json create mode 100644 test_json/mitpe/professional_ed_resources_2.json diff --git a/app.json b/app.json index e02f108f58..964e757b3a 100644 --- a/app.json +++ b/app.json @@ -229,6 +229,10 @@ "description": "URL to MicroMasters catalog API", "required": "false" }, + "MITPE_API_ENABLED": { + "description": "Whether MIT Professional Education ETL should be enabled", + "required": "false" + }, "MITPE_BASE_URL": { "description": "Base URL for MIT Professional Education website", "required": "false" @@ -511,10 +515,6 @@ "description": "URL to retrieve a MITx access token", "required": false }, - "SEE_API_ENABLED": { - "description": "Whether the Sloan ETL shouold be enabled", - "required": false - }, "SEE_API_URL": { "description": "URL to retrieve MITx course data from", "required": false diff --git a/learning_resources/etl/constants.py b/learning_resources/etl/constants.py index 140433f2a3..b8031a8c66 100644 --- a/learning_resources/etl/constants.py +++ b/learning_resources/etl/constants.py @@ -52,13 +52,14 @@ class ETLSource(ExtendedEnum): micromasters = "micromasters" mit_edx = "mit_edx" + mitpe = "mitpe" mitxonline = "mitxonline" oll = "oll" - xpro = "xpro" ocw = "ocw" prolearn = "prolearn" podcast = "podcast" see = "see" + xpro = "xpro" youtube = "youtube" @@ -82,6 +83,9 @@ class CourseNumberType(Enum): "": LearningResourceDelivery.online.name, "Blended": LearningResourceDelivery.hybrid.name, "In Person": LearningResourceDelivery.in_person.name, + "Live Virtual": LearningResourceDelivery.online.name, + "Live Online": LearningResourceDelivery.online.name, + "On Campus": LearningResourceDelivery.in_person.name, **{ value: LearningResourceDelivery(value).name for value in LearningResourceDelivery.values() diff --git a/learning_resources/etl/mitpe.py b/learning_resources/etl/mitpe.py new file mode 100644 index 0000000000..e100beca8e --- /dev/null +++ b/learning_resources/etl/mitpe.py @@ -0,0 +1,394 @@ +"""Professional Education ETL""" + +import copy +import html +import logging +import re +from datetime import UTC, datetime +from decimal import Decimal +from urllib.parse import urljoin +from zoneinfo import ZoneInfo + +import requests +from django.conf import settings + +from learning_resources.constants import ( + CURRENCY_USD, + Availability, + CertificationType, + Format, + LearningResourceType, + OfferedBy, + Pace, + PlatformType, +) +from learning_resources.etl.constants import ETLSource +from learning_resources.etl.utils import transform_delivery +from main.utils import clean_data, now_in_utc + +log = logging.getLogger(__name__) + +BASE_URL = "https://professional.mit.edu/" +OFFERED_BY = {"code": OfferedBy.mitpe.name} + + +def _fetch_data(url) -> list[dict]: + """ + Fetch data from the Professional Education API + + Args: + url(str): The url to fetch data from + + Yields: + list[dict]: A list of course or program data + """ + params = {"page": 0} + has_results = True + while has_results: + results = requests.get( + url, params=params, timeout=settings.REQUESTS_TIMEOUT + ).json() + has_results = len(results) > 0 + yield from results + params["page"] += 1 + + +def extract() -> list[dict]: + """ + Load the Professional Education data from an external API + + Returns: + list[dict]: list of raw course or program data + """ + required_settings = [ + "MITPE_BASE_API_URL", + "MITPE_API_ENABLED", + ] + for setting in required_settings: + if not getattr(settings, setting): + log.warning("Missing required setting %s", setting) + return [] + return list(_fetch_data(urljoin(settings.MITPE_BASE_API_URL, "/feeds/courses/"))) + + +def parse_topics(resource_data: dict) -> list[dict]: + """ + Get a list containing {"name": } dict objects + + Args: + resource_data: course or program data + + Returns: + list of dict: list containing topic dicts with a name attribute + """ + extracted_topics = resource_data["topics"] + if not extracted_topics: + return [] + return [{"name": html.unescape(topic)} for topic in extracted_topics.split("|")] + + +def parse_instructors(resource_data: dict) -> list[dict]: + """ + Get a list of instructors for a resource + """ + instructors = [] + for attribute in ["lead_instructors", "instructors"]: + instructors.extend( + [ + {"full_name": html.unescape(instructor)} + for instructor in resource_data[attribute].split("|") + ] + ) + return instructors + + +def parse_image(resource_data: dict) -> dict or None: + """ + Create a dict object representing the resource image + + Args: + resource_data: course or program data + + Returns: + dict: json representation of the image if it exists + """ + img_src = resource_data["image__src"] + if img_src: + return { + "alt": resource_data["image__alt"], + "url": urljoin(BASE_URL, img_src), + } + return None + + +def parse_date(date_str: str) -> datetime or None: + """ + Get a datetime value from a string + + Args: + date_str: string representing a date + + Returns: + datetime: start or end date + """ + if date_str: + return ( + datetime.strptime(date_str, "%Y-%m-%d") + .replace(tzinfo=ZoneInfo("US/Eastern")) + .astimezone(UTC) + ) + return None + + +def parse_resource_url(resource_data: dict) -> str: + """ + Return the url for the resource + + Args: + resource_data: course or program data + + Returns: + str: url for the resource + """ + return urljoin(BASE_URL, resource_data["url"]) + + +def clean_title(title: str) -> str: + """ + Clean the title of the resource + + Args: + title: title of the resource + + Returns: + str: cleaned title + """ + return html.unescape(title.strip()) + + +def parse_description(resource_data: dict) -> str: + """ + Return the description for the resource. Use summary field if not blank. + + Args: + resource_data: course or program data + + Returns: + str: description for the resource + """ + return clean_data(resource_data["description"]) + + +def parse_resource_type(resource_data: dict) -> str: + """ + Return the type of the resource based on certificate data + + Args: + resource_data: course or program data + + Returns: + str: type of the resource (course or program) + """ + if resource_data["resource_type"]: + return resource_data["resource_type"].lower() + else: + # Determine based on certificate data + if "Certificate of Completion" in resource_data["course_certificate"].split( + "|" + ): + return LearningResourceType.course.name + return LearningResourceType.program.name + + +def parse_location(resource_data: dict) -> str: + """ + Return the location of the resource if relevant + + Args: + resource_data: course or program data + + Returns: + str: location of the resource + """ + if resource_data["learning_format"] in ["In Person", "On Campus", "Blended"]: + return resource_data["location"] + return "" + + +def _transform_runs(resource_data: dict) -> list[dict]: + """ + Transform course/program runs into our normalized data structure + + Args: + resource_data (dict): course/program data + + Returns: + list[dict]: normalized course/program run data + """ + now = now_in_utc() + runs_data = zip( + resource_data["run__readable_id"].split("|"), + resource_data["start_date"].split("|"), + resource_data["end_date"].split("|"), + resource_data["enrollment_end_date"].split("|"), + ) + published_runs = [] + for run_data in runs_data: + start = parse_date(run_data[1]) + end = parse_date(run_data[2]) + enrollment_end = parse_date(run_data[3]) + published = (not end and not enrollment_end) or (now <= (enrollment_end or end)) + if published: + published_runs.append( + { + "run_id": run_data[0], + "title": clean_title(resource_data["title"]), + "description": parse_description(resource_data), + "start_date": start, + "end_date": end, + "enrollment_end": enrollment_end, + "published": published, + "prices": [ + { + "amount": Decimal( + re.sub(r"[^0-9\\.]", "", resource_data["price"]) + ), + "currency": CURRENCY_USD, + } + ] + if resource_data["price"] + else [], + "url": parse_resource_url(resource_data), + "instructors": parse_instructors(resource_data), + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + "delivery": transform_delivery(resource_data["learning_format"]), + "location": parse_location(resource_data), + } + ) + return published_runs + + +def transform_course(resource_data: dict) -> dict or None: + """ + Transform raw resource data into a format suitable for + learning resources of type course + + Args: + resource_data(dict): raw course data + + Returns: + dict: transformed course data if it has any viable runs + """ + runs = _transform_runs(resource_data) + if runs: + return { + "readable_id": resource_data["uuid"], + "offered_by": copy.deepcopy(OFFERED_BY), + "platform": PlatformType.mitpe.name, + "etl_source": ETLSource.mitpe.name, + "professional": True, + "certification": True, + "certification_type": CertificationType.professional.name, + "title": clean_title(resource_data["title"]), + "url": parse_resource_url(resource_data), + "image": parse_image(resource_data), + "description": parse_description(resource_data), + "course": { + "course_numbers": [], + }, + "delivery": transform_delivery(resource_data["learning_format"]), + "published": True, + "topics": parse_topics(resource_data), + "runs": runs, + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + } + return None + + +def transform_program(resource_data: dict) -> dict or None: + """ + Transform raw resource data into a format suitable for the Program model + + Args: + resource_data(dict): raw program data + + Returns: + dict: transformed program data + """ + runs = _transform_runs(resource_data) + if runs: + return { + "readable_id": resource_data["uuid"], + "offered_by": copy.deepcopy(OFFERED_BY), + "platform": PlatformType.mitpe.name, + "etl_source": ETLSource.mitpe.name, + "professional": True, + "certification": True, + "certification_type": CertificationType.professional.name, + "title": clean_title(resource_data["title"]), + "url": parse_resource_url(resource_data), + "image": parse_image(resource_data), + "description": parse_description(resource_data), + "course_titles": [ + course_title + for course_title in resource_data["courses"].split("|") + if course_title + ], + "delivery": transform_delivery(resource_data["learning_format"]), + "published": True, + "topics": parse_topics(resource_data), + "runs": runs, + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + } + return None + + +def transform_program_courses(programs: list[dict], courses_data: list[dict]): + """ + Transform the courses for a program, using the transformed course data + + Args: + programs(list[dict]): list of program data + courses_data(list[dict]): list of course data + """ + course_dict = {course["title"]: course for course in courses_data} + for program in programs: + course_titles = [ + clean_title(title) for title in program.pop("course_titles", []) + ] + program["courses"] = [ + copy.deepcopy(course_dict[course_title]) + for course_title in course_titles + if course_title in course_dict + ] + + +def transform(data: list[dict]) -> tuple[list[dict], list[dict]]: + """ + Transform the Professional Education data into courses and programs + + Args: + data(dict): raw course and program data + + Returns: + tuple[list[dict], list[dict]]: tuple containing lists of course and program data + """ + programs = [] + courses = [] + for resource in data: + if parse_resource_type(resource) == LearningResourceType.program.name: + program = transform_program(resource) + if program: + programs.append(program) + else: + course = transform_course(resource) + if course: + courses.append(course) + transform_program_courses(programs, courses) + return courses, programs diff --git a/learning_resources/etl/mitpe_test.py b/learning_resources/etl/mitpe_test.py new file mode 100644 index 0000000000..70346583f0 --- /dev/null +++ b/learning_resources/etl/mitpe_test.py @@ -0,0 +1,242 @@ +"""Tests for Professional Education ETL functions""" + +import datetime +import json +from decimal import Decimal +from pathlib import Path + +import pytest + +from learning_resources.constants import ( + Availability, + Format, + LearningResourceDelivery, + Pace, +) +from learning_resources.etl import mitpe +from learning_resources.factories import ( + LearningResourceOfferorFactory, + LearningResourceTopicFactory, + LearningResourceTopicMappingFactory, +) +from main.test_utils import assert_json_equal + +EXPECTED_COURSES = [ + { + "readable_id": "a44c8b47-552c-45f9-b91b-854172201889", + "offered_by": {"code": "mitpe"}, + "platform": "mitpe", + "etl_source": "mitpe", + "professional": True, + "certification": True, + "certification_type": "professional", + "title": "Comunicação Persuasiva: Pensamento Crítico para Aprimorar a Mensagem (Portuguese)", + "url": "https://professional.mit.edu/course-catalog/comunicacao-persuasiva-pensamento-critico-para-aprimorar-mensagem-portuguese", + "image": { + "alt": " Persuasive Communication Critical Thinking -web banner", + "url": "https://professional.mit.edu/sites/default/files/2022-01/1600x800.png", + }, + "description": "Profissionais de áreas técnicas estão acostumados a falar ou apresentar dados para perfis que compartem os mesmos interesses e campo de atuação, mas podem encontrar dificuldades em transmitir suas ideias para pessoas de outros setores.\n", + "course": {"course_numbers": []}, + "delivery": [LearningResourceDelivery.online.name], + "published": True, + "topics": [ + {"name": "Organizations & Leadership"}, + {"name": "Product Innovation"}, + ], + "runs": [ + { + "run_id": "7802023070620230907", + "title": "Comunicação Persuasiva: Pensamento Crítico para Aprimorar a Mensagem (Portuguese)", + "description": "Profissionais de áreas técnicas estão acostumados a falar ou apresentar dados para perfis que compartem os mesmos interesses e campo de atuação, mas podem encontrar dificuldades em transmitir suas ideias para pessoas de outros setores.\n", + "start_date": datetime.datetime(2123, 7, 6, 4, 0, tzinfo=datetime.UTC), + "end_date": datetime.datetime(2123, 9, 7, 4, 0, tzinfo=datetime.UTC), + "enrollment_end": datetime.datetime( + 2123, 4, 25, 4, 0, tzinfo=datetime.UTC + ), + "published": True, + "prices": [{"amount": Decimal(1870), "currency": "USD"}], + "url": "https://professional.mit.edu/course-catalog/comunicacao-persuasiva-pensamento-critico-para-aprimorar-mensagem-portuguese", + "instructors": [{"full_name": "Edward Schiappa"}, {"full_name": ""}], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + "delivery": [LearningResourceDelivery.online.name], + "location": "", + } + ], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + }, + { + "readable_id": "e3be75f6-f7c9-432b-9c24-70c7132e1583", + "offered_by": {"code": "mitpe"}, + "platform": "mitpe", + "etl_source": "mitpe", + "professional": True, + "certification": True, + "certification_type": "professional", + "title": "Design-Thinking and Innovation for Technical Leaders", + "url": "https://professional.mit.edu/course-catalog/design-thinking-and-innovation-technical-leaders", + "image": { + "alt": "Mastering Innovation &amp; Design-Thinking header ", + "url": "https://professional.mit.edu/sites/default/files/2020-08/MITPE-MasteringInnovationDesignThinking-website-banner-1600x800.jpg", + }, + "description": "Become a stronger leader of innovation and design-thinking in your workplace. Join us for a highly interactive and engaging course that will teach you powerful new approaches for creating innovative solutions, crafting vision that gets buy-in, and developing solutions that people love. You'll learn our proven 10-Step Design Process and gain the strategies and hands-on experience to make your mark as a leader of innovation. Don't miss this opportunity to take your leadership capabilities to the next level.\n\nThis course may be taken individually or as part of the Professional Certificate Program in Innovation and Technology.\n", + "course": {"course_numbers": []}, + "delivery": [LearningResourceDelivery.in_person.name], + "published": True, + "topics": [{"name": "Digital Business & IT"}], + "runs": [ + { + "run_id": "4172023071720230719", + "title": "Design-Thinking and Innovation for Technical Leaders", + "description": "Become a stronger leader of innovation and design-thinking in your workplace. Join us for a highly interactive and engaging course that will teach you powerful new approaches for creating innovative solutions, crafting vision that gets buy-in, and developing solutions that people love. You'll learn our proven 10-Step Design Process and gain the strategies and hands-on experience to make your mark as a leader of innovation. Don't miss this opportunity to take your leadership capabilities to the next level.\n\nThis course may be taken individually or as part of the Professional Certificate Program in Innovation and Technology.\n", + "start_date": datetime.datetime(2123, 7, 17, 4, 0, tzinfo=datetime.UTC), + "end_date": datetime.datetime(2123, 7, 19, 4, 0, tzinfo=datetime.UTC), + "enrollment_end": datetime.datetime( + 2123, 6, 17, 4, 0, tzinfo=datetime.UTC + ), + "published": True, + "prices": [{"amount": Decimal(3600), "currency": "USD"}], + "url": "https://professional.mit.edu/course-catalog/design-thinking-and-innovation-technical-leaders", + "instructors": [ + {"full_name": "Blade Kotelly"}, + {"full_name": "Reza Rahaman"}, + {"full_name": ""}, + ], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + "delivery": [LearningResourceDelivery.in_person.name], + "location": "On Campus", + } + ], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + }, +] +EXPECTED_PROGRAMS = [ + { + "readable_id": "790a82a4-8967-4b77-9342-4f6be5809abd", + "offered_by": {"code": "mitpe"}, + "platform": "mitpe", + "etl_source": "mitpe", + "professional": True, + "certification": True, + "certification_type": "professional", + "title": "Manufatura Inteligente: Produção na Indústria 4.0 (Portuguese)", + "url": "https://professional.mit.edu/course-catalog/manufatura-inteligente-producao-na-industria-40-portuguese", + "image": { + "alt": "Smart Manufacturing Header Image", + "url": "https://professional.mit.edu/sites/default/files/2020-08/Smart%20Manufacturing.jpg", + }, + "description": "A fábrica do futuro já está aqui. Participe do programa online Manufatura Inteligente: Produção na Indústria 4.0 e aproveite a experiência de mais de cem anos de colaboração do MIT com vários setores. Aprenda as chaves para criar uma indústria inteligente em qualquer escala e saiba como software, sensores e sistemas são integrados para essa finalidade. Com este programa interativo, você passará da criação de modelos a sistemas de fabricação e análise avançada de dados para desenvolver estratégias que gerem uma vantagem competitiva.\n", + "delivery": [LearningResourceDelivery.online.name], + "published": True, + "topics": [{"name": "Business Analytics"}, {"name": "Machine Learning"}], + "runs": [ + { + "run_id": "7192023070620230914", + "title": "Manufatura Inteligente: Produção na Indústria 4.0 (Portuguese)", + "description": "A fábrica do futuro já está aqui. Participe do programa online Manufatura Inteligente: Produção na Indústria 4.0 e aproveite a experiência de mais de cem anos de colaboração do MIT com vários setores. Aprenda as chaves para criar uma indústria inteligente em qualquer escala e saiba como software, sensores e sistemas são integrados para essa finalidade. Com este programa interativo, você passará da criação de modelos a sistemas de fabricação e análise avançada de dados para desenvolver estratégias que gerem uma vantagem competitiva.\n", + "start_date": datetime.datetime(2123, 7, 6, 4, 0, tzinfo=datetime.UTC), + "end_date": datetime.datetime(2123, 9, 14, 4, 0, tzinfo=datetime.UTC), + "enrollment_end": datetime.datetime( + 2123, 7, 6, 4, 0, tzinfo=datetime.UTC + ), + "published": True, + "prices": [{"amount": Decimal(1870), "currency": "USD"}], + "url": "https://professional.mit.edu/course-catalog/manufatura-inteligente-producao-na-industria-40-portuguese", + "instructors": [{"full_name": ""}, {"full_name": "Brian Anthony"}], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + "location": "", + "delivery": [LearningResourceDelivery.online.name], + } + ], + "courses": [EXPECTED_COURSES[0], EXPECTED_COURSES[1]], + "format": [Format.asynchronous.name], + "pace": [Pace.instructor_paced.name], + "availability": Availability.dated.name, + } +] + + +@pytest.fixture +def prof_ed_settings(settings): + """Fixture to set Professional Education API URL""" + settings.MITPE_BASE_API_URL = "http://pro_edu_api.com" + settings.MITPE_API_ENABLED = True + return settings + + +@pytest.fixture +def mock_fetch_data(mocker): + """Mock fetch_data function""" + + def read_json(file_path): + with Path.open(file_path, "r") as file: + return mocker.Mock(json=mocker.Mock(return_value=json.load(file))) + + return mocker.patch( + "learning_resources.etl.mitpe.requests.get", + side_effect=[ + read_json("./test_json/mitpe/professional_ed_resources_0.json"), + read_json("./test_json/mitpe/professional_ed_resources_1.json"), + read_json("./test_json/mitpe/professional_ed_resources_2.json"), + ], + ) + + +@pytest.mark.parametrize("prof_ed_api_url", ["http://pro_edd_api.com", None]) +def test_extract(settings, mock_fetch_data, prof_ed_api_url): + """Test extract function""" + settings.MITPE_BASE_API_URL = prof_ed_api_url + settings.MITPE_API_ENABLED = True + expected = [] + for page in range(3): + with Path.open( + Path(f"./test_json/mitpe/professional_ed_resources_{page}.json"), + "r", + ) as file: + expected.extend(json.load(file)) + results = mitpe.extract() + if prof_ed_api_url: + assert len(results) == 3 + assert_json_equal(results, expected) + else: + assert len(results) == 0 + + +@pytest.mark.django_db +def test_transform(settings, mock_fetch_data, prof_ed_settings): + """Test transform function, and effectively most other functions""" + settings.MITPE_BASE_API_URL = "http://pro_edu_api.edu" + offeror = LearningResourceOfferorFactory.create(code="mitpe") + LearningResourceTopicMappingFactory.create( + offeror=offeror, + topic=LearningResourceTopicFactory.create(name="Product Innovation"), + topic_name="Technology Innovation", + ) + LearningResourceTopicMappingFactory.create( + offeror=offeror, + topic=LearningResourceTopicFactory.create(name="Data Science"), + topic_name="Data Science", + ) + extracted = mitpe.extract() + assert len(extracted) == 3 + courses, programs = mitpe.transform(extracted) + assert_json_equal( + sorted(courses, key=lambda course: course["readable_id"]), EXPECTED_COURSES + ) + assert_json_equal(programs, EXPECTED_PROGRAMS) + + +def test_enabled_flag(prof_ed_settings, settings): + """Extract should return empty lists if the MITPE_API_ENABLED flag is False""" + settings.MITPE_API_ENABLED = False + assert mitpe.extract() == [] diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py index 023263e58b..864e01ee9f 100644 --- a/learning_resources/etl/pipelines.py +++ b/learning_resources/etl/pipelines.py @@ -12,6 +12,7 @@ micromasters, mit_edx, mit_edx_programs, + mitpe, mitxonline, ocw, oll, @@ -28,6 +29,7 @@ ProgramLoaderConfig, ) from learning_resources.etl.exceptions import ExtractException +from learning_resources.models import LearningResource log = logging.getLogger(__name__) @@ -191,3 +193,23 @@ def ocw_courses_etl( posthog.posthog_transform_lrd_view_events, posthog.posthog_extract_lrd_view_events, ) + + +def mitpe_etl() -> tuple[list[LearningResource], list[LearningResource]]: + """ + ETL for professional education courses and programs. + + This pipeline is structured a bit differently than others because the source API + and the transform/extract functions return both courses and programs. + """ + courses_data, programs_data = mitpe.transform(mitpe.extract()) + return ( + loaders.load_courses( + ETLSource.mitpe.name, courses_data, config=CourseLoaderConfig(prune=True) + ), + loaders.load_programs( + ETLSource.mitpe.name, + programs_data, + config=ProgramLoaderConfig(prune=True, courses=CourseLoaderConfig()), + ), + ) diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 3c8ea38a26..755625ec62 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -379,6 +379,32 @@ def test_prolearn_programs_etl(): assert result == mock_load_programs.return_value +def test_mitpe_etl(mocker): + """Verify that the professional education etl pipeline executes correctly""" + mocker.patch("learning_resources.etl.mitpe.extract") + mock_transform = mocker.patch( + "learning_resources.etl.mitpe.transform", + return_value=( + [{"a": "b"}, {"c": "d"}], + [{"e": "f"}, {"g": "h"}], + ), + ) + mock_load_courses = mocker.patch("learning_resources.etl.loaders.load_courses") + mock_load_programs = mocker.patch("learning_resources.etl.loaders.load_programs") + results = pipelines.mitpe_etl() + mock_load_courses.assert_called_once_with( + ETLSource.mitpe.name, + mock_transform.return_value[0], + config=CourseLoaderConfig(prune=True), + ) + mock_load_programs.assert_called_once_with( + ETLSource.mitpe.name, + mock_transform.return_value[1], + config=ProgramLoaderConfig(prune=True, courses=CourseLoaderConfig()), + ) + assert results == (mock_load_courses.return_value, mock_load_programs.return_value) + + def test_prolearn_courses_etl(): """Verify that prolearn courses etl pipeline executes correctly""" with reload_mocked_pipeline( diff --git a/learning_resources/etl/prolearn.py b/learning_resources/etl/prolearn.py index e8a82435d7..68ffc70fdf 100644 --- a/learning_resources/etl/prolearn.py +++ b/learning_resources/etl/prolearn.py @@ -38,34 +38,36 @@ # List of query fields for prolearn, deduced from its website api calls PROLEARN_QUERY_FIELDS = "title\nnid\nurl\ncertificate_name\ncourse_application_url\ncourse_link\nfield_course_or_program\nstart_value\nend_value\ndepartment\ndepartment_url\nbody\nbody_override\nfield_time_commitment\nfield_duration\nfeatured_image_url\nfield_featured_video\nfield_non_degree_credits\nfield_price\nfield_related_courses_programs\nrelated_courses_programs_title\nfield_time_commitment\nucc_hot_topic\nucc_name\nucc_tid\napplication_process\napplication_process_override\nformat_name\nimage_override_url\nvideo_override_url\nfield_new_course_program\nfield_tooltip" # noqa: E501 -SEE_EXCLUSION = ( - '{operator: "<>", name: "department", value: "MIT Sloan Executive Education"}' +MITPE_EXCLUSION = ( + '{operator: "<>", name: "department", value: "MIT Professional Education"}' ) + # Performs the query made on https://prolearn.mit.edu/graphql, with a filter for program or course # noqa: E501 PROLEARN_QUERY = """ -query { +query {{ searchAPISearch( index_id:\"default_solr_index\", - range:{limit: 999, offset: 0}, - condition_group: { + range:{{limit: 999, offset: 0}}, + condition_group: {{ conjunction: AND, groups: [ - { + {{ conjunction: AND, conditions: [ - {operator: \"=\", name: \"field_course_or_program\", value: \"%s\"}, - {operator: \"<>\", name: \"department\", value: \"MIT xPRO\"} - %s + {{operator: \"=\", name: \"field_course_or_program\", value: \"{course_or_program}\"}}, + {{operator: \"<>\", name: \"department\", value: \"MIT xPRO\"}}, + {{operator: \"<>\", name: \"department\", value: \"MIT Sloan Executive Education\"}} + {mitpe_exclusion} ] - } + }} ] - } - ) { + }} + ) {{ result_count - documents {... on DefaultSolrIndexDoc {%s}} - } -} + documents {{... on DefaultSolrIndexDoc {{{query_fields}}}}} + }} +}} """ # noqa: E501 UNIQUE_FIELD = "url" @@ -209,12 +211,15 @@ def extract_data(course_or_program: str) -> list[dict]: list of dict: courses or programs """ # noqa: D401, E501 if settings.PROLEARN_CATALOG_API_URL: - sloan_filter = SEE_EXCLUSION if settings.SEE_API_ENABLED else "" + mitpe_filter = MITPE_EXCLUSION if settings.MITPE_API_ENABLED else "" response = requests.post( settings.PROLEARN_CATALOG_API_URL, json={ - "query": PROLEARN_QUERY - % (course_or_program, sloan_filter, PROLEARN_QUERY_FIELDS) + "query": PROLEARN_QUERY.format( + course_or_program=course_or_program, + mitpe_exclusion=mitpe_filter, + query_fields=PROLEARN_QUERY_FIELDS, + ) }, timeout=30, ).json() diff --git a/learning_resources/etl/prolearn_test.py b/learning_resources/etl/prolearn_test.py index d7016fb8c6..8b4adb6e6b 100644 --- a/learning_resources/etl/prolearn_test.py +++ b/learning_resources/etl/prolearn_test.py @@ -3,6 +3,7 @@ import json from datetime import UTC, datetime from decimal import Decimal +from pathlib import Path from urllib.parse import urljoin, urlparse import pytest @@ -20,8 +21,8 @@ ) from learning_resources.etl.constants import ETLSource from learning_resources.etl.prolearn import ( + MITPE_EXCLUSION, PROLEARN_BASE_URL, - SEE_EXCLUSION, UNIQUE_FIELD, extract_courses, extract_data, @@ -62,22 +63,23 @@ def _mock_offerors_platforms(): @pytest.fixture(autouse=True) -def mock_prolearn_api_setting(settings): # noqa: PT004 +def mock_prolearn_api_setting(settings): """Set the prolearn api url""" settings.PROLEARN_CATALOG_API_URL = "http://localhost/test/programs/api" + return settings @pytest.fixture def mock_csail_programs_data(): """Mock prolearn CSAIL programs data""" - with open("./test_json/prolearn_csail_programs.json") as f: # noqa: PTH123 + with Path.open("./test_json/prolearn_csail_programs.json") as f: return json.loads(f.read()) @pytest.fixture def mock_mitpe_courses_data(): """Mock prolearn MIT Professional Education courses data""" - with open("./test_json/prolearn_mitpe_courses.json") as f: # noqa: PTH123 + with Path.open("./test_json/prolearn_mitpe_courses.json") as f: return json.loads(f.read()) @@ -283,8 +285,8 @@ def test_prolearn_transform_courses(mock_mitpe_courses_data): @pytest.mark.parametrize( ("date_int", "expected_dt"), [ - [1670932800, datetime(2022, 12, 13, 12, 0, tzinfo=UTC)], # noqa: PT007 - [None, None], # noqa: PT007 + (1670932800, datetime(2022, 12, 13, 12, 0, tzinfo=UTC)), + (None, None), ], ) def test_parse_date(date_int, expected_dt): @@ -312,10 +314,10 @@ def test_parse_price(price_str, expected_price): @pytest.mark.parametrize( ("topic", "expected"), [ - ["Blockchain", "Blockchain"], # noqa: PT007 - ["Systems Engineering", "Systems Engineering"], # noqa: PT007 - ["Other Business", "Management"], # noqa: PT007 - ["Other Technology", "Digital Business & IT"], # noqa: PT007 + ("Blockchain", "Blockchain"), + ("Systems Engineering", "Systems Engineering"), + ("Other Business", "Management"), + ("Other Technology", "Digital Business & IT"), ], ) def test_parse_topic(topic, expected): @@ -370,9 +372,9 @@ def test_parse_platform(department, platform_name): @pytest.mark.parametrize( ("featured_image_url", "expected_url"), [ - ["/a/b/c.jog", "http://localhost/a/b/c.jog"], # noqa: PT007 - ["", None], # noqa: PT007 - [None, None], # noqa: PT007 + ("/a/b/c.jog", "http://localhost/a/b/c.jog"), + ("", None), + (None, None), ], ) def test_parse_image(featured_image_url, expected_url): @@ -422,12 +424,12 @@ def test_update_delivery( assert first_course["delivery"] == sorted(expected_delivery) -@pytest.mark.parametrize("sloan_api_enabled", [True, False]) -def test_sloan_exclusion(settings, mocker, sloan_api_enabled): - """Slaon exclusion should be included if sloan api enabled""" - settings.SEE_API_ENABLED = sloan_api_enabled - mock_post = mocker.patch("learning_resources.etl.sloan.requests.post") +@pytest.mark.parametrize("mitpe_api_enabled", [True, False]) +def test_mitpe_exclusion(settings, mocker, mitpe_api_enabled): + """MITPE exclusion should be included if respective api enabled""" + settings.MITPE_API_ENABLED = mitpe_api_enabled + mock_post = mocker.patch("learning_resources.etl.prolearn.requests.post") extract_data("course") assert ( - SEE_EXCLUSION in mock_post.call_args[1]["json"]["query"] - ) is sloan_api_enabled + MITPE_EXCLUSION in mock_post.call_args[1]["json"]["query"] + ) is mitpe_api_enabled diff --git a/learning_resources/etl/sloan.py b/learning_resources/etl/sloan.py index 82c3aa3284..2ee629d37f 100644 --- a/learning_resources/etl/sloan.py +++ b/learning_resources/etl/sloan.py @@ -197,7 +197,6 @@ def extract(): "SEE_API_CLIENT_SECRET", "SEE_API_ACCESS_TOKEN_URL", "SEE_API_URL", - "SEE_API_ENABLED", ] for setting in required_settings: if not getattr(settings, setting): diff --git a/learning_resources/etl/sloan_test.py b/learning_resources/etl/sloan_test.py index ee0510c9e6..ea797eec2a 100644 --- a/learning_resources/etl/sloan_test.py +++ b/learning_resources/etl/sloan_test.py @@ -39,7 +39,6 @@ def mock_sloan_api_setting(settings): # noqa: PT004 settings.SEE_API_CLIENT_ID = "test" settings.SEE_API_CLIENT_SECRET = "test" # noqa: S105 settings.SEE_API_ACCESS_TOKEN_URL = "http://localhost/test/access-token" # noqa: S105 - settings.SEE_API_ENABLED = True @pytest.fixture @@ -234,12 +233,6 @@ def test_parse_availability(delivery, run_format, availability): assert parse_availability(None) == Availability.dated.name -def test_enabled_flag(mock_sloan_api_setting, settings): - """Extract should return empty lists if the SEE_API_ENABLED flag is False""" - settings.SEE_API_ENABLED = False - assert extract() == ([], []) - - @pytest.mark.parametrize( ("delivery", "run_format", "pace"), [ diff --git a/learning_resources/management/commands/backpopulate_mitpe_data.py b/learning_resources/management/commands/backpopulate_mitpe_data.py new file mode 100644 index 0000000000..30c1b3c1bc --- /dev/null +++ b/learning_resources/management/commands/backpopulate_mitpe_data.py @@ -0,0 +1,50 @@ +"""Management command for populating professional education course/program data""" + +from django.core.management import BaseCommand + +from learning_resources.etl.constants import ETLSource +from learning_resources.models import LearningResource +from learning_resources.tasks import get_mitpe_data +from learning_resources.utils import resource_delete_actions +from main.utils import now_in_utc + + +class Command(BaseCommand): + """Populate professional education courses""" + + help = "Populate professional education courses" + + def add_arguments(self, parser): + parser.add_argument( + "--delete", + dest="delete", + action="store_true", + help="Delete all existing records first", + ) + super().add_arguments(parser) + + def handle(self, *args, **options): # noqa: ARG002 + """Run Populate professional education courses""" + if options["delete"]: + self.stdout.write( + "Deleting all existing Prof. Ed. courses from database and opensearch" + ) + for resource in LearningResource.objects.filter( + etl_source=ETLSource.mitpe.value + ): + resource_delete_actions(resource) + else: + task = get_mitpe_data.delay() + self.stdout.write( + f"Started task {task} to get professional education course/program data" + ) + self.stdout.write("Waiting on task...") + start = now_in_utc() + count = task.get() + total_seconds = (now_in_utc() - start).total_seconds() + self.stdout.write( + f"Population of Prof. Ed. data finished, took {total_seconds} seconds" + ) + self.stdout.write( + f"Populated {count} resources. See celery logs for details." + ) diff --git a/learning_resources/management/commands/transfer_list_resources.py b/learning_resources/management/commands/transfer_list_resources.py index 03b4ab4e36..29a6749aa2 100644 --- a/learning_resources/management/commands/transfer_list_resources.py +++ b/learning_resources/management/commands/transfer_list_resources.py @@ -15,7 +15,12 @@ class Command(BaseCommand): help = "Migrate relationships from unpublished resources to published resources." def add_arguments(self, parser): - parser.add_argument("resource_type", type=str, help="Resource type to migrate") + parser.add_argument( + "from_resource_type", type=str, help="Resource type to migrate from" + ) + parser.add_argument( + "to_resource_type", type=str, help="Resource type to migrate to" + ) parser.add_argument( "match_field", type=str, help="Resource field to match resources by" ) @@ -38,19 +43,21 @@ def handle(self, *args, **options): # noqa: ARG002 Migrate relationships in learningpaths and userlists from unpublished resources to published replacement resources. """ - resource_type = options["resource_type"] + from_resource_type = options["from_resource_type"] + to_resource_type = options["to_resource_type"] match_field = options["match_field"] from_source = options["from_source"] to_source = options["to_source"] delete = options["delete"] self.stdout.write( - f"Migrate {resource_type} relationships from " - f"{from_source} to {to_source}, matching on {match_field}" + f"Migrate {from_resource_type} relationships from {from_source}" + f" to {to_resource_type}:{to_source}, matching on {match_field}" ) start = now_in_utc() unpublished, matching = transfer_list_resources( - resource_type, + from_resource_type, + to_resource_type, match_field, from_source, to_source, diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py index 0cd857bcfe..50cc12eaea 100644 --- a/learning_resources/tasks.py +++ b/learning_resources/tasks.py @@ -91,6 +91,13 @@ def get_oll_data(sheets_id=None): return len(courses) +@app.task +def get_mitpe_data(): + """Execute the Professional Education ETL pipeline""" + courses, programs = pipelines.mitpe_etl() + return len(courses) + len(programs) + + @app.task def get_prolearn_data(): """Execute the ProLearn ETL pipelines""" diff --git a/learning_resources/tasks_test.py b/learning_resources/tasks_test.py index 065ce3af9a..e1f4d0d4af 100644 --- a/learning_resources/tasks_test.py +++ b/learning_resources/tasks_test.py @@ -113,6 +113,18 @@ def test_get_oll_data(mocker): mock_pipelines.oll_etl.assert_called_once_with(None) +def test_get_mitpe_data(mocker): + """Verify that the get_mitpe_data task invokes the Professional Ed pipeline""" + mock_pipelines = mocker.patch("learning_resources.tasks.pipelines") + mock_pipelines.mitpe_etl.return_value = ( + LearningResourceFactory.create_batch(2), + LearningResourceFactory.create_batch(1), + ) + task = tasks.get_mitpe_data.delay() + mock_pipelines.mitpe_etl.assert_called_once_with() + assert task.result == 3 + + def test_get_prolearn_data(mocker): """Verify that the get_prolearn_data invokes the Prolearn ETL pipeline""" mock_pipelines = mocker.patch("learning_resources.tasks.pipelines") diff --git a/learning_resources/utils.py b/learning_resources/utils.py index 4abfb80543..56da4f8e32 100644 --- a/learning_resources/utils.py +++ b/learning_resources/utils.py @@ -496,8 +496,9 @@ def add_parent_topics_to_learning_resource(resource): _walk_lr_topic_parents(resource, topic.parent) -def transfer_list_resources( - resource_type: str, +def transfer_list_resources( # noqa: PLR0913 + from_resource_type: str, + to_resource_type: str, matching_field: str, from_source: str, to_source: str, @@ -519,7 +520,7 @@ def transfer_list_resources( tuple[int, int]: the number of unpublished and matching published resources """ unpublished_resources = LearningResource.objects.filter( - resource_type=resource_type, published=False, etl_source=from_source + resource_type=from_resource_type, published=False, etl_source=from_source ) unpublished_count = 0 published_count = 0 @@ -528,7 +529,7 @@ def transfer_list_resources( unique_value = getattr(resource, matching_field) published_replacement = LearningResource.objects.filter( **{matching_field: unique_value}, - resource_type=resource_type, + resource_type=to_resource_type, published=True, etl_source=to_source, ).first() diff --git a/learning_resources/utils_test.py b/learning_resources/utils_test.py index 6267ace80b..98d01b5203 100644 --- a/learning_resources/utils_test.py +++ b/learning_resources/utils_test.py @@ -543,7 +543,12 @@ def test_transfer_list_resources( ] results = transfer_list_resources( - "podcast", matching_field, from_source, to_source, delete_unpublished=delete_old + "podcast", + "podcast", + matching_field, + from_source, + to_source, + delete_unpublished=delete_old, ) podcast_path.refresh_from_db() podcast_list.refresh_from_db() diff --git a/learning_resources_search/utils.py b/learning_resources_search/utils.py index 2e0632e415..f94bfd7ac0 100644 --- a/learning_resources_search/utils.py +++ b/learning_resources_search/utils.py @@ -35,7 +35,9 @@ def prune_channel_subscriptions(): actual_query, _ = PercolateQuery.objects.get_or_create( source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, original_query=adjusted_original_query, - query=adjust_query_for_percolator(adjusted_original_query), + defaults={ + "query": adjust_query_for_percolator(adjusted_original_query), + }, ) queries = PercolateQuery.objects.filter( original_query__contains=urllib.parse.parse_qs(query_string), diff --git a/main/settings_celery.py b/main/settings_celery.py index 8209830a9a..6eb21387eb 100644 --- a/main/settings_celery.py +++ b/main/settings_celery.py @@ -53,6 +53,10 @@ "PODCAST_FETCH_SCHEDULE_SECONDS", 60 * 60 * 2 ), # default is every 2 hours }, + "update-professional-ed-resources-every-1-days": { + "task": "learning_resources.tasks.get_mitpe_data", + "schedule": crontab(minute=0, hour=21), # 5:00pm EST + }, "update-prolearn-courses-every-1-days": { "task": "learning_resources.tasks.get_prolearn_data", "schedule": crontab(minute=0, hour=5), # 1:00am EST diff --git a/main/settings_course_etl.py b/main/settings_course_etl.py index 519a241683..8684c4daf8 100644 --- a/main/settings_course_etl.py +++ b/main/settings_course_etl.py @@ -71,12 +71,13 @@ SEE_API_ACCESS_TOKEN_URL = get_string("SEE_API_ACCESS_TOKEN_URL", None) SEE_API_CLIENT_ID = get_string("SEE_API_CLIENT_ID", None) SEE_API_CLIENT_SECRET = get_string("SEE_API_CLIENT_SECRET", None) -SEE_API_ENABLED = get_bool("SEE_API_ENABLED", default=False) + CSAIL_BASE_URL = get_string("CSAIL_BASE_URL", None) SEE_BASE_URL = get_string("SEE_BASE_URL", None) MITPE_BASE_URL = get_string("MITPE_BASE_URL", "https://professional.mit.edu/") MITPE_BASE_API_URL = get_string("MITPE_BASE_API_URL", None) +MITPE_API_ENABLED = get_bool("MITPE_API_ENABLED", default=False) # course catalog video etl settings OPEN_VIDEO_DATA_BRANCH = get_string("OPEN_VIDEO_DATA_BRANCH", "master") diff --git a/test_json/mitpe/professional_ed_resources_0.json b/test_json/mitpe/professional_ed_resources_0.json new file mode 100644 index 0000000000..72f43811a7 --- /dev/null +++ b/test_json/mitpe/professional_ed_resources_0.json @@ -0,0 +1,52 @@ +[ + { + "uuid": "790a82a4-8967-4b77-9342-4f6be5809abd", + "title": "Manufatura Inteligente: Produção na Indústria 4.0 (Portuguese)", + "url": "/course-catalog/manufatura-inteligente-producao-na-industria-40-portuguese", + "description": "A fábrica do futuro já está aqui. Participe do programa online Manufatura Inteligente: Produção na Indústria 4.0 e aproveite a experiência de mais de cem anos de colaboração do MIT com vários setores. Aprenda as chaves para criar uma indústria inteligente em qualquer escala e saiba como software, sensores e sistemas são integrados para essa finalidade. Com este programa interativo, você passará da criação de modelos a sistemas de fabricação e análise avançada de dados para desenvolver estratégias que gerem uma vantagem competitiva.\n", + "learning_format": "Online", + "resource_type": "program", + "course_certificate": "", + "topics": "Business Analytics|Machine Learning", + "image__src": "/sites/default/files/2020-08/Smart%20Manufacturing.jpg", + "image__alt": "Smart Manufacturing Header Image", + "courses": "Comunicação Persuasiva: Pensamento Crítico para Aprimorar a Mensagem (Portuguese)|Design-Thinking and Innovation for Technical Leaders", + "start_date": "2123-07-06", + "end_date": "2123-09-14", + "enrollment_end_date": "2123-07-06", + "price": "$1870", + "lead_instructors": "", + "instructors": "Brian Anthony", + "language": "", + "node_id": "719", + "location": "Online", + "duration": "10 semanas", + "continuing_ed_credits": "8", + "run__readable_id": "7192023070620230914" + }, + { + "uuid": "a44c8b47-552c-45f9-b91b-854172201889", + "title": "Comunicação Persuasiva: Pensamento Crítico para Aprimorar a Mensagem (Portuguese)", + "url": "/course-catalog/comunicacao-persuasiva-pensamento-critico-para-aprimorar-mensagem-portuguese", + "description": "Profissionais de áreas técnicas estão acostumados a falar ou apresentar dados para perfis que compartem os mesmos interesses e campo de atuação, mas podem encontrar dificuldades em transmitir suas ideias para pessoas de outros setores.\n", + "learning_format": "Online", + "resource_type": "", + "course_certificate": "Chief Digital Officer|Digital Transformation|Industry 4.0|Certificate of Completion", + "topics": "Organizations & Leadership|Product Innovation", + "image__src": "/sites/default/files/2022-01/1600x800.png", + "image__alt": " Persuasive Communication Critical Thinking -web banner", + "courses": "", + "start_date": "2123-07-06", + "end_date": "2123-09-07", + "enrollment_end_date": "2123-04-25", + "price": "$1870", + "lead_instructors": "Edward Schiappa", + "instructors": "", + "language": "", + "node_id": "780", + "location": "online", + "duration": "9 semanas", + "continuing_ed_credits": "7.2", + "run__readable_id": "7802023070620230907" + } +] diff --git a/test_json/mitpe/professional_ed_resources_1.json b/test_json/mitpe/professional_ed_resources_1.json new file mode 100644 index 0000000000..d673713691 --- /dev/null +++ b/test_json/mitpe/professional_ed_resources_1.json @@ -0,0 +1,27 @@ +[ + { + "uuid": "e3be75f6-f7c9-432b-9c24-70c7132e1583", + "title": "Design-Thinking and Innovation for Technical Leaders", + "url": "/course-catalog/design-thinking-and-innovation-technical-leaders", + "description": "Become a stronger leader of innovation and design-thinking in your workplace. Join us for a highly interactive and engaging course that will teach you powerful new approaches for creating innovative solutions, crafting vision that gets buy-in, and developing solutions that people love. You'll learn our proven 10-Step Design Process and gain the strategies and hands-on experience to make your mark as a leader of innovation. Don't miss this opportunity to take your leadership capabilities to the next level.\n\nThis course may be taken individually or as part of the Professional Certificate Program in Innovation and Technology.\n", + "learning_format": "On Campus", + "resource_type": "", + "course_certificate": "Certificate of Completion", + "topics": "Digital Business & IT", + "image__src": "/sites/default/files/2020-08/MITPE-MasteringInnovationDesignThinking-website-banner-1600x800.jpg", + "image__alt": "Mastering Innovation &amp; Design-Thinking header ", + "courses": "", + "start_date": "2123-07-17|2025-09-01", + "end_date": "2123-07-19|2025-12-01", + "enrollment_end_date": "2123-06-17|2025-09-01", + "price": "$3600", + "lead_instructors": "Blade Kotelly|Reza Rahaman", + "instructors": "", + "language": "", + "node_id": "417", + "location": "On Campus", + "duration": "3 Days", + "continuing_ed_credits": "2.0 CEUs", + "run__readable_id": "4172023071720230719" + } +] diff --git a/test_json/mitpe/professional_ed_resources_2.json b/test_json/mitpe/professional_ed_resources_2.json new file mode 100644 index 0000000000..fe51488c70 --- /dev/null +++ b/test_json/mitpe/professional_ed_resources_2.json @@ -0,0 +1 @@ +[] From a1927f129dddadd91c725b8eebf17321bd9ace4d Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 29 Oct 2024 13:44:53 -0400 Subject: [PATCH 2/2] Converge on one url setting for professional ed: MITPE_BASE_URL --- learning_resources/etl/mitpe.py | 9 ++++----- learning_resources/etl/mitpe_test.py | 6 ++---- main/settings_course_etl.py | 1 - news_events/etl/mitpe_events.py | 6 +++--- news_events/etl/mitpe_events_test.py | 11 ++--------- news_events/etl/mitpe_news.py | 6 +++--- news_events/etl/mitpe_news_test.py | 11 ++--------- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/learning_resources/etl/mitpe.py b/learning_resources/etl/mitpe.py index e100beca8e..103ce7b05a 100644 --- a/learning_resources/etl/mitpe.py +++ b/learning_resources/etl/mitpe.py @@ -28,7 +28,6 @@ log = logging.getLogger(__name__) -BASE_URL = "https://professional.mit.edu/" OFFERED_BY = {"code": OfferedBy.mitpe.name} @@ -61,14 +60,14 @@ def extract() -> list[dict]: list[dict]: list of raw course or program data """ required_settings = [ - "MITPE_BASE_API_URL", + "MITPE_BASE_URL", "MITPE_API_ENABLED", ] for setting in required_settings: if not getattr(settings, setting): log.warning("Missing required setting %s", setting) return [] - return list(_fetch_data(urljoin(settings.MITPE_BASE_API_URL, "/feeds/courses/"))) + return list(_fetch_data(urljoin(settings.MITPE_BASE_URL, "/feeds/courses/"))) def parse_topics(resource_data: dict) -> list[dict]: @@ -116,7 +115,7 @@ def parse_image(resource_data: dict) -> dict or None: if img_src: return { "alt": resource_data["image__alt"], - "url": urljoin(BASE_URL, img_src), + "url": urljoin(settings.MITPE_BASE_URL, img_src), } return None @@ -150,7 +149,7 @@ def parse_resource_url(resource_data: dict) -> str: Returns: str: url for the resource """ - return urljoin(BASE_URL, resource_data["url"]) + return urljoin(settings.MITPE_BASE_URL, resource_data["url"]) def clean_title(title: str) -> str: diff --git a/learning_resources/etl/mitpe_test.py b/learning_resources/etl/mitpe_test.py index 70346583f0..ef3bc65b93 100644 --- a/learning_resources/etl/mitpe_test.py +++ b/learning_resources/etl/mitpe_test.py @@ -169,7 +169,6 @@ @pytest.fixture def prof_ed_settings(settings): """Fixture to set Professional Education API URL""" - settings.MITPE_BASE_API_URL = "http://pro_edu_api.com" settings.MITPE_API_ENABLED = True return settings @@ -195,7 +194,7 @@ def read_json(file_path): @pytest.mark.parametrize("prof_ed_api_url", ["http://pro_edd_api.com", None]) def test_extract(settings, mock_fetch_data, prof_ed_api_url): """Test extract function""" - settings.MITPE_BASE_API_URL = prof_ed_api_url + settings.MITPE_BASE_URL = prof_ed_api_url settings.MITPE_API_ENABLED = True expected = [] for page in range(3): @@ -213,9 +212,8 @@ def test_extract(settings, mock_fetch_data, prof_ed_api_url): @pytest.mark.django_db -def test_transform(settings, mock_fetch_data, prof_ed_settings): +def test_transform(mock_fetch_data, prof_ed_settings): """Test transform function, and effectively most other functions""" - settings.MITPE_BASE_API_URL = "http://pro_edu_api.edu" offeror = LearningResourceOfferorFactory.create(code="mitpe") LearningResourceTopicMappingFactory.create( offeror=offeror, diff --git a/main/settings_course_etl.py b/main/settings_course_etl.py index 8684c4daf8..30bfbf1771 100644 --- a/main/settings_course_etl.py +++ b/main/settings_course_etl.py @@ -76,7 +76,6 @@ CSAIL_BASE_URL = get_string("CSAIL_BASE_URL", None) SEE_BASE_URL = get_string("SEE_BASE_URL", None) MITPE_BASE_URL = get_string("MITPE_BASE_URL", "https://professional.mit.edu/") -MITPE_BASE_API_URL = get_string("MITPE_BASE_API_URL", None) MITPE_API_ENABLED = get_bool("MITPE_API_ENABLED", default=False) # course catalog video etl settings diff --git a/news_events/etl/mitpe_events.py b/news_events/etl/mitpe_events.py index 6f7c93134b..672c36274c 100644 --- a/news_events/etl/mitpe_events.py +++ b/news_events/etl/mitpe_events.py @@ -27,12 +27,12 @@ def extract() -> list[dict]: Returns: list[dict]: News data in JSON format. """ - if settings.MITPE_BASE_API_URL: + if settings.MITPE_BASE_URL: return list( - fetch_data_by_page(urljoin(settings.MITPE_BASE_API_URL, "/feeds/events/")) + fetch_data_by_page(urljoin(settings.MITPE_BASE_URL, "/feeds/events/")) ) else: - log.warning("Missing required setting MITPE_BASE_API_URL") + log.warning("Missing required setting MITPE_BASE_URL") return [] diff --git a/news_events/etl/mitpe_events_test.py b/news_events/etl/mitpe_events_test.py index 2b6cce9f1c..14d0d5617c 100644 --- a/news_events/etl/mitpe_events_test.py +++ b/news_events/etl/mitpe_events_test.py @@ -9,13 +9,6 @@ from news_events.etl.mitpe_events import extract, transform -@pytest.fixture -def mitpe_events_settings(settings): - """Assign the required MITPE settings""" - settings.MITPE_BASE_API_URL = "https://api.example.com" - return settings - - @pytest.fixture def mitpe_events_json_data(): """Return the raw content of the MITPE events json response""" @@ -30,12 +23,12 @@ def _mock_get_json(mocker, mitpe_events_json_data): mock_get.side_effect = [mitpe_events_json_data, []] -def test_extract(mitpe_events_json_data, mitpe_events_settings): +def test_extract(mitpe_events_json_data): """Extract function should return raw json data for MITPE events""" assert extract() == mitpe_events_json_data -def test_transform(mitpe_events_json_data, mitpe_events_settings): +def test_transform(mitpe_events_json_data): """Assert that the transform function returns the expected data""" source_and_items = transform(extract()) assert len(source_and_items) == 1 diff --git a/news_events/etl/mitpe_news.py b/news_events/etl/mitpe_news.py index 3d6a07392d..ad50298eb7 100644 --- a/news_events/etl/mitpe_news.py +++ b/news_events/etl/mitpe_news.py @@ -23,12 +23,12 @@ def extract() -> list[dict]: Returns: list[dict]: News data in JSON format. """ - if settings.MITPE_BASE_API_URL: + if settings.MITPE_BASE_URL: return list( - fetch_data_by_page(urljoin(settings.MITPE_BASE_API_URL, "/feeds/news/")) + fetch_data_by_page(urljoin(settings.MITPE_BASE_URL, "/feeds/news/")) ) else: - log.warning("Missing required setting MITPE_BASE_API_URL") + log.warning("Missing required setting MITPE_BASE_URL") return [] diff --git a/news_events/etl/mitpe_news_test.py b/news_events/etl/mitpe_news_test.py index 7fd7f1f247..2793daf327 100644 --- a/news_events/etl/mitpe_news_test.py +++ b/news_events/etl/mitpe_news_test.py @@ -9,13 +9,6 @@ from news_events.etl.mitpe_news import extract, transform -@pytest.fixture -def mitpe_news_settings(settings): - """Assign the required MITPE settings""" - settings.MITPE_BASE_API_URL = "https://api.example.com" - return settings - - @pytest.fixture def mitpe_news_json_data(): """Return the raw content of the MITPE news json response""" @@ -30,12 +23,12 @@ def _mock_get_json(mocker, mitpe_news_json_data): mock_get.side_effect = [mitpe_news_json_data, []] -def test_extract(settings, mitpe_news_json_data, mitpe_news_settings): +def test_extract(settings, mitpe_news_json_data): """Extract function should return raw json data for MITPE news""" assert extract() == mitpe_news_json_data -def test_transform(mitpe_news_json_data, mitpe_news_settings): +def test_transform(mitpe_news_json_data): """Assert that the transform function returns the expected data""" source_and_items = transform(extract()) assert len(source_and_items) == 1