diff --git a/frontends/api/src/generated/api.ts b/frontends/api/src/generated/api.ts index 66805bd61f..3658de19ef 100644 --- a/frontends/api/src/generated/api.ts +++ b/frontends/api/src/generated/api.ts @@ -270,10 +270,10 @@ export type ContentTypeEnum = export interface Course { /** * - * @type {Array} + * @type {{ [key: string]: any; }} * @memberof Course */ - extra_course_numbers?: Array | null + course_numbers?: { [key: string]: any } | null } /** * Serializer for the Course model @@ -283,10 +283,10 @@ export interface Course { export interface CourseRequest { /** * - * @type {Array} + * @type {{ [key: string]: any; }} * @memberof CourseRequest */ - extra_course_numbers?: Array | null + course_numbers?: { [key: string]: any } | null } /** * Serializer for the LearningPath model diff --git a/frontends/api/src/test-utils/factories/learningResources.ts b/frontends/api/src/test-utils/factories/learningResources.ts index 321212cb5c..4b550d3119 100644 --- a/frontends/api/src/test-utils/factories/learningResources.ts +++ b/frontends/api/src/test-utils/factories/learningResources.ts @@ -139,7 +139,7 @@ const learningResource: Factory = ( certification: faker.lorem.word(), offered_by: faker.lorem.word(), course: { - extra_course_numbers: maybe(() => repeat(faker.lorem.word)) ?? null, + course_numbers: maybe(() => repeat(faker.datatype.json)) ?? [], }, } } else if (type === ResourceTypeEnum.Program) { diff --git a/learning_resources/etl/constants.py b/learning_resources/etl/constants.py index 0aa542bcb9..47a6fbdb39 100644 --- a/learning_resources/etl/constants.py +++ b/learning_resources/etl/constants.py @@ -48,3 +48,10 @@ class ETLSource(Enum): ocw = "ocw" prolearn = "prolearn" podcast = "podcast" + + +class CourseNumberType(Enum): + """Enum of course number types""" + + primary = "primary" + cross_listed = "cross-listed" diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index 5eb270d4d5..ed555418dd 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -123,12 +123,12 @@ def test_load_program( platform = LearningResourcePlatformFactory.create() program = ( - ProgramFactory.create(courses=[], platform=platform) + ProgramFactory.create(courses=[], platform=platform.platform) if program_exists - else ProgramFactory.build(courses=[], platform=platform) + else ProgramFactory.build(courses=[], platform=platform.platform) ) - LearningResourcePlatformFactory.create(platform=platform) + LearningResourcePlatformFactory.create(platform=platform.platform) if program_exists: learning_resource = program.learning_resource @@ -138,16 +138,16 @@ def test_load_program( learning_resource.save() courses = ( - CourseFactory.create_batch(2, platform=platform) + CourseFactory.create_batch(2, platform=platform.platform) if courses_exist - else CourseFactory.build_batch(2, platform=platform) + else CourseFactory.build_batch(2, platform=platform.platform) ) before_course_count = len(courses) if courses_exist else 0 after_course_count = len(courses) if program_exists and has_retired_course: - course = CourseFactory.create(platform=platform) + course = CourseFactory.create(platform=platform.platform) before_course_count += 1 after_course_count += 1 program.learning_resource.resources.set( @@ -262,9 +262,9 @@ def test_load_course( # noqa: PLR0913 platform = LearningResourcePlatformFactory.create() course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if course_exists - else CourseFactory.build(runs=[], platform=platform) + else CourseFactory.build(runs=[], platform=platform.platform) ) learning_resource = course.learning_resource @@ -283,7 +283,7 @@ def test_load_course( # noqa: PLR0913 props = { "readable_id": learning_resource.readable_id, - "platform": platform, + "platform": platform.platform, "professional": True, "title": learning_resource.title, "image": {"url": learning_resource.image.url}, @@ -375,13 +375,13 @@ def test_load_duplicate_course( platform = LearningResourcePlatformFactory.create() course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if course_exists else CourseFactory.build() ) duplicate_course = ( - CourseFactory.create(runs=[], platform=platform) + CourseFactory.create(runs=[], platform=platform.platform) if duplicate_course_exists else CourseFactory.build() ) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index c68e71a14e..e185a2ae99 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -13,6 +13,7 @@ from learning_resources.etl.constants import ETLSource from learning_resources.etl.utils import ( extract_valid_department_from_id, + generate_course_numbers_json, transform_topics, ) @@ -146,6 +147,11 @@ def _transform_course(course): "runs": [ _transform_run(course_run, course) for course_run in course["courseruns"] ], + "course": { + "course_numbers": generate_course_numbers_json( + course["readable_id"], is_ocw=False + ), + }, "published": bool( parse_page_attribute(course, "page_url") ), # a course is only considered published if it has a page url diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index e9c675e0c5..5a401c7dc4 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -4,13 +4,14 @@ # pylint: disable=redefined-outer-name from datetime import datetime from itertools import chain +from unittest.mock import ANY from urllib.parse import urljoin import pytest from learning_resources.constants import LearningResourceType, PlatformType from learning_resources.etl import mitxonline -from learning_resources.etl.constants import ETLSource +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.mitxonline import ( _transform_image, parse_page_attribute, @@ -194,6 +195,15 @@ def test_mitxonline_transform_programs(mock_mitxonline_programs_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": ANY, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in program_data["courses"] if "PROCTORED EXAM" not in course_data["title"] @@ -271,6 +281,15 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": ANY, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in mock_mitxonline_courses_data if "PROCTORED EXAM" not in course_data["title"] diff --git a/learning_resources/etl/ocw.py b/learning_resources/etl/ocw.py index cb418bba9f..84f54575fe 100644 --- a/learning_resources/etl/ocw.py +++ b/learning_resources/etl/ocw.py @@ -26,6 +26,7 @@ from learning_resources.etl.constants import ETLSource from learning_resources.etl.utils import ( extract_text_metadata, + generate_course_numbers_json, get_content_type, ) from learning_resources.models import ContentFile, LearningResource @@ -286,7 +287,7 @@ def transform_course(course_data: dict) -> dict: else: extra_course_numbers = [] - readable_id = f"{course_data.get(PRIMARY_COURSE_ID)}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 + readable_id = f"{course_data[PRIMARY_COURSE_ID]}+{slugify(course_data.get('term'))}_{course_data.get('year')}" # noqa: E501 topics = [ {"name": topic_name} for topic_name in list( @@ -321,7 +322,11 @@ def transform_course(course_data: dict) -> dict: "last_modified": course_data.get("last_modified"), "published": True, "course": { - "extra_course_numbers": extra_course_numbers, + "course_numbers": generate_course_numbers_json( + course_data[PRIMARY_COURSE_ID], + extra_nums=extra_course_numbers, + is_ocw=True, + ), }, "topics": topics, "runs": [transform_run(course_data)], diff --git a/learning_resources/etl/ocw_test.py b/learning_resources/etl/ocw_test.py index 721f635306..bb15a3e7e3 100644 --- a/learning_resources/etl/ocw_test.py +++ b/learning_resources/etl/ocw_test.py @@ -9,7 +9,8 @@ from moto import mock_s3 from learning_resources.conftest import OCW_TEST_PREFIX, setup_s3_ocw -from learning_resources.etl.constants import ETLSource +from learning_resources.constants import DEPARTMENTS +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.ocw import ( transform_content_files, transform_contentfile, @@ -195,8 +196,26 @@ def test_transform_course(settings, legacy_uid, site_uid, expected_uid, has_extr assert transformed_json["image"]["alt"] == ( "Illustration of an aircraft wing showing connections between the disciplines of the course." ) - assert transformed_json["course"]["extra_course_numbers"] == ( - ["1", "2"] if has_extra_num else [] + assert transformed_json["course"]["course_numbers"][0] == { + "value": "16.01", + "department": {"department_id": "16", "name": DEPARTMENTS["16"]}, + "listing_type": CourseNumberType.primary.value, + } + assert transformed_json["course"]["course_numbers"][1:] == ( + [ + { + "value": "1", + "department": {"department_id": "1", "name": DEPARTMENTS["1"]}, + "listing_type": CourseNumberType.cross_listed.value, + }, + { + "value": "2", + "department": {"department_id": "2", "name": DEPARTMENTS["2"]}, + "listing_type": CourseNumberType.cross_listed.value, + }, + ] + if has_extra_num + else [] ) else: assert transformed_json is None diff --git a/learning_resources/etl/openedx.py b/learning_resources/etl/openedx.py index 573e12b04e..cc6b6fc6bb 100644 --- a/learning_resources/etl/openedx.py +++ b/learning_resources/etl/openedx.py @@ -12,7 +12,10 @@ from learning_resources.constants import LearningResourceType from learning_resources.etl.constants import COMMON_HEADERS -from learning_resources.etl.utils import extract_valid_department_from_id +from learning_resources.etl.utils import ( + extract_valid_department_from_id, + generate_course_numbers_json, +) from learning_resources.utils import get_year_and_semester MIT_OWNER_KEYS = ["MITx", "MITx_PRO"] @@ -262,6 +265,9 @@ def _transform_course(config, course): for course_run in course.get("course_runs", []) if _filter_course_run(course_run) ], + "course": { + "course_numbers": generate_course_numbers_json(course.get("key")), + }, "published": any( run["status"] == "published" for run in course.get("course_runs", []) ), diff --git a/learning_resources/etl/openedx_test.py b/learning_resources/etl/openedx_test.py index dbee39e75f..1427551e98 100644 --- a/learning_resources/etl/openedx_test.py +++ b/learning_resources/etl/openedx_test.py @@ -6,7 +6,7 @@ import pytest from learning_resources.constants import LearningResourceType -from learning_resources.etl.constants import COMMON_HEADERS +from learning_resources.etl.constants import COMMON_HEADERS, CourseNumberType from learning_resources.etl.openedx import ( OpenEdxConfiguration, openedx_extract_transform_factory, @@ -170,6 +170,18 @@ def test_transform_course( # noqa: PLR0913 "published": True, } ], + "course": { + "course_numbers": [ + { + "value": "MITx+15.071x", + "department": { + "department_id": "15", + "name": "Sloan School of Management", + }, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } if not is_course_run_deleted: assert transformed_courses[1]["published"] is False diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index 4583f3d7de..baef1a5d0b 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -208,7 +208,12 @@ def test_ocw_courses_etl(settings, mocker): resource = LearningResource.objects.first() assert resource.readable_id == "16.01+fall_2005" - assert resource.course.extra_course_numbers == ["16.02", "16.03", "16.04"] + assert [num["value"] for num in resource.course.course_numbers] == [ + "16.01", + "16.02", + "16.03", + "16.04", + ] assert resource.platform.platform == PlatformType.ocw.name assert resource.offered_by.code == OfferedBy.ocw.name assert resource.departments.first().department_id == "16" diff --git a/learning_resources/etl/prolearn.py b/learning_resources/etl/prolearn.py index b35cc78dc9..1911fa9e03 100644 --- a/learning_resources/etl/prolearn.py +++ b/learning_resources/etl/prolearn.py @@ -316,6 +316,9 @@ def _transform_course( "url": parse_url(course), "image": parse_image(course), "description": course["body"], + "course": { + "course_numbers": [], + }, "published": True, "topics": parse_topic(course), "runs": _transform_runs(course), diff --git a/learning_resources/etl/prolearn_test.py b/learning_resources/etl/prolearn_test.py index 3633ee8428..5c501fdcb9 100644 --- a/learning_resources/etl/prolearn_test.py +++ b/learning_resources/etl/prolearn_test.py @@ -204,6 +204,7 @@ def test_prolearn_transform_courses(mock_mitpe_courses_data): course["start_value"], course["end_value"] ) ], + "course": {"course_numbers": []}, } for course in extracted_data ] diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py index 67b13cfbf3..f447331ba9 100644 --- a/learning_resources/etl/utils.py +++ b/learning_resources/etl/utils.py @@ -33,8 +33,11 @@ DEPARTMENTS, VALID_TEXT_FILE_TYPES, ) -from learning_resources.etl.constants import ETLSource -from learning_resources.models import ContentFile, LearningResourceRun +from learning_resources.etl.constants import CourseNumberType, ETLSource +from learning_resources.models import ( + ContentFile, + LearningResourceRun, +) log = logging.getLogger(__name__) @@ -496,7 +499,9 @@ def get_content_type(file_type: str) -> str: return CONTENT_TYPE_FILE -def extract_valid_department_from_id(course_string: str) -> list[str]: +def extract_valid_department_from_id( + course_string: str, is_ocw: bool = False # noqa: FBT001, FBT002 +) -> list[str]: """ Extracts a department from course data and returns @@ -506,8 +511,53 @@ def extract_valid_department_from_id(course_string: str) -> list[str]: Returns: department (str): parsed department string """ # noqa: D401 - department_string = re.search(r"\+([^\.]*)\.", course_string) + num_pattern = r"^(\d+)\.*" if is_ocw else r"\+([^\.]*)\." + department_string = re.search(num_pattern, course_string) if department_string: dept_candidate = department_string.groups()[0] + # Some CMS-W department courses start with 21W, but we want to use CMS-W + if dept_candidate == "21W": + dept_candidate = "CMS-W" return [dept_candidate] if dept_candidate in DEPARTMENTS else [] return [] + + +def generate_course_numbers_json( + course_num: str, + extra_nums: list[str] | None = None, + is_ocw: bool = False, # noqa: FBT001, FBT002 +) -> list[dict]: + """ + Generate a dict containing info on course numbers and departments + + Args: + course_num (str): primary course number + extra_nums (list[str]): list of cross-listed course numbers + is_ocw (bool): whether or not the course is an OCW course + + Returns: + course_number_json (list[dict]): list of dicts containing course number info + + """ + course_number_json = [] + course_numbers = [course_num] + if not extra_nums: + extra_nums = [] + course_numbers.extend(extra_nums) + for idx, num in enumerate(course_numbers): + dept_id = extract_valid_department_from_id(num, is_ocw=is_ocw) + course_number_json.append( + { + "value": num, + "listing_type": CourseNumberType.primary.value + if idx == 0 + else CourseNumberType.cross_listed.value, + "department": { + "department_id": dept_id[0], + "name": DEPARTMENTS[dept_id[0]], + } + if dept_id + else None, + } + ) + return course_number_json diff --git a/learning_resources/etl/xpro.py b/learning_resources/etl/xpro.py index 44f5ff7b79..f5928d35cc 100644 --- a/learning_resources/etl/xpro.py +++ b/learning_resources/etl/xpro.py @@ -9,7 +9,7 @@ from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType from learning_resources.etl.constants import ETLSource -from learning_resources.etl.utils import transform_topics +from learning_resources.etl.utils import generate_course_numbers_json, transform_topics log = logging.getLogger(__name__) @@ -87,7 +87,6 @@ def _transform_learning_resource_course(course): Args: course (dict): course data - xpro_platform_map (dict): dict of xpro platform names to platform values Returns: dict: normalized learning resource data @@ -108,6 +107,11 @@ def _transform_learning_resource_course(course): "topics": transform_topics(course.get("topics", [])), "runs": [_transform_run(course_run) for course_run in course["courseruns"]], "resource_type": LearningResourceType.course.name, + "course": { + "course_numbers": generate_course_numbers_json( + course["readable_id"], is_ocw=False + ), + }, } diff --git a/learning_resources/etl/xpro_test.py b/learning_resources/etl/xpro_test.py index 447f13fecc..a0c60f766a 100644 --- a/learning_resources/etl/xpro_test.py +++ b/learning_resources/etl/xpro_test.py @@ -9,7 +9,7 @@ from learning_resources.constants import LearningResourceType, PlatformType from learning_resources.etl import xpro -from learning_resources.etl.constants import ETLSource +from learning_resources.etl.constants import CourseNumberType, ETLSource from learning_resources.etl.utils import UCC_TOPIC_MAPPINGS from open_discussions.test_utils import any_instance_of @@ -162,6 +162,15 @@ def test_xpro_transform_programs(mock_xpro_programs_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": None, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in program_data["courses"] ], @@ -218,6 +227,15 @@ def test_xpro_transform_courses(mock_xpro_courses_data): } for course_run_data in course_data["courseruns"] ], + "course": { + "course_numbers": [ + { + "value": course_data["readable_id"], + "department": None, + "listing_type": CourseNumberType.primary.value, + } + ] + }, } for course_data in mock_xpro_courses_data ] diff --git a/learning_resources/factories.py b/learning_resources/factories.py index 6b6cfa7cf3..dbbc0c2e7b 100644 --- a/learning_resources/factories.py +++ b/learning_resources/factories.py @@ -100,7 +100,7 @@ class LearningResourcePlatformFactory(DjangoModelFactory): """Factory for LearningResourcePlatform""" platform = FuzzyChoice([platform.name for platform in constants.PlatformType]) - name = FuzzyChoice([platform.value for platform in constants.PlatformType]) + name = factory.LazyAttribute(lambda o: constants.PlatformType[o.platform].value) is_edx = Faker("boolean") has_content_files = Faker("boolean") @@ -123,8 +123,8 @@ class Meta: class LearningResourceOfferorFactory(DjangoModelFactory): """Factory for LearningResourceOfferor""" - name = FuzzyChoice([offeror.value for offeror in constants.OfferedBy]) code = FuzzyChoice([offeror.name for offeror in constants.OfferedBy]) + name = factory.LazyAttribute(lambda o: constants.OfferedBy[o.code].value) professional = Faker("boolean") class Meta: @@ -188,7 +188,15 @@ class CourseFactory(DjangoModelFactory): LearningResourceFactory, resource_type=constants.LearningResourceType.course.name, ) - extra_course_numbers = factory.List([]) + course_numbers = factory.List( + [ + { + "value": f"{random.randint(1,20)}.0001", # noqa: S311 + "department": None, + "listing_type": "Primary", + } + ] + ) @factory.post_generation def runs(self, create, extracted, **kwargs): # noqa: ARG002 @@ -210,28 +218,28 @@ def platform(self, create, extracted, **kwargs): # noqa: ARG002 return self.learning_resource.platform = LearningResourcePlatformFactory.create( - platform=extracted + platform=extracted, name=constants.PlatformType[extracted].value ) self.learning_resource.save() @factory.post_generation - def etl_source(self, create, extracted, **kwargs): # noqa: ARG002 - """Create etl_source for course.learning_resource""" + def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 + """Create LearningResourceOfferor for course.learning_resource""" if not create or not extracted: return - self.learning_resource.etl_source = extracted + self.learning_resource.offered_by = LearningResourceOfferorFactory.create( + code=extracted, name=constants.OfferedBy[extracted].value + ) self.learning_resource.save() @factory.post_generation - def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 - """Create LearningResourceOfferor for course.learning_resource""" + def etl_source(self, create, extracted, **kwargs): # noqa: ARG002 + """Create etl_source for course.learning_resource""" if not create or not extracted: return - self.learning_resource.offered_by = LearningResourceOfferorFactory.create( - code=extracted - ) + self.learning_resource.etl_source = extracted self.learning_resource.save() @factory.post_generation @@ -394,11 +402,24 @@ def runs(self, create, extracted, **kwargs): # noqa: ARG002 @factory.post_generation def platform(self, create, extracted, **kwargs): # noqa: ARG002 - """Create platform for program.learning_resource""" + """Create platform for course.learning_resource""" + if not create or not extracted: + return + + self.learning_resource.platform = LearningResourcePlatformFactory.create( + platform=extracted, name=constants.PlatformType[extracted].value + ) + self.learning_resource.save() + + @factory.post_generation + def offered_by(self, create, extracted, **kwargs): # noqa: ARG002 + """Create LearningResourceOfferor for course.learning_resource""" if not create or not extracted: return - self.learning_resource.platform = extracted + self.learning_resource.offered_by = LearningResourceOfferorFactory.create( + code=extracted, name=constants.OfferedBy[extracted].value + ) self.learning_resource.save() @factory.post_generation diff --git a/learning_resources/migrations/0023_course_course_numbers.py b/learning_resources/migrations/0023_course_course_numbers.py new file mode 100644 index 0000000000..c60009d29e --- /dev/null +++ b/learning_resources/migrations/0023_course_course_numbers.py @@ -0,0 +1,40 @@ +# Generated by Django 4.1.10 on 2023-10-31 15:06 + +from django.db import migrations, models + +from learning_resources.etl.constants import ETLSource +from learning_resources.etl.utils import generate_course_numbers_json + + +def set_course_numbers(apps, schema_editor): + """ + Save course_numbers for every course + """ + Course = apps.get_model("learning_resources", "Course") + for course in Course.objects.select_related("learning_resource").iterator(): + is_ocw = course.learning_resource.etl_source == ETLSource.ocw.name + course.course_numbers = generate_course_numbers_json( + course.learning_resource.readable_id.split("+")[0] + if is_ocw + else course.learning_resource.readable_id, + extra_nums=course.extra_course_numbers, + is_ocw=is_ocw, + ) + course.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0022_alter_learningresource_resource_type"), + ] + + operations = [ + migrations.AddField( + model_name="course", + name="course_numbers", + field=models.JSONField(blank=True, null=True), + ), + migrations.RunPython( + set_course_numbers, reverse_code=migrations.RunPython.noop + ), + ] diff --git a/learning_resources/migrations/0024_remove_course_extra_course_numbers.py b/learning_resources/migrations/0024_remove_course_extra_course_numbers.py new file mode 100644 index 0000000000..6b95e80d89 --- /dev/null +++ b/learning_resources/migrations/0024_remove_course_extra_course_numbers.py @@ -0,0 +1,16 @@ +# Generated by Django 4.1.10 on 2023-10-31 18:52 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0023_course_course_numbers"), + ] + + operations = [ + migrations.RemoveField( + model_name="course", + name="extra_course_numbers", + ), + ] diff --git a/learning_resources/models.py b/learning_resources/models.py index e705d75fc1..8535815e7d 100644 --- a/learning_resources/models.py +++ b/learning_resources/models.py @@ -3,6 +3,7 @@ from django.contrib.auth.models import User from django.contrib.postgres.fields import ArrayField from django.db import models +from django.db.models import JSONField from learning_resources import constants from learning_resources.constants import ( @@ -232,9 +233,7 @@ class Course(TimestampedModel): on_delete=models.deletion.CASCADE, primary_key=True, ) - extra_course_numbers = ArrayField( - models.CharField(max_length=128), null=True, blank=True - ) + course_numbers = JSONField(null=True, blank=True) @property def runs(self): diff --git a/learning_resources/models_test.py b/learning_resources/models_test.py index 28a6ba2094..ff30d7cce4 100644 --- a/learning_resources/models_test.py +++ b/learning_resources/models_test.py @@ -87,12 +87,12 @@ def test_course_creation(): ) def test_lr_certification(offered_by, availability, has_cert): """The certification property should return the expected value""" - offered_by = LearningResourceOfferorFactory.create(name=offered_by) + offered_by = LearningResourceOfferorFactory.create(code=offered_by) course = CourseFactory.create( - offered_by=offered_by, + offered_by=offered_by.code, runs=[], - is_professional=(has_cert and offered_by != constants.OfferedBy.mitx.value), + is_professional=(has_cert and offered_by != constants.OfferedBy.mitx.name), ) assert course.learning_resource.certification == ( diff --git a/learning_resources/serializers_test.py b/learning_resources/serializers_test.py index 90c1b2dafb..bdcbc993be 100644 --- a/learning_resources/serializers_test.py +++ b/learning_resources/serializers_test.py @@ -21,10 +21,8 @@ def test_serialize_course_model(): """ Verify that a serialized course contains attributes for related objects """ - course = factories.CourseFactory.create() - serializer = serializers.LearningResourceSerializer( - instance=course.learning_resource - ) + course_resource = factories.CourseFactory.create().learning_resource + serializer = serializers.LearningResourceSerializer(instance=course_resource) assert len(serializer.data["topics"]) > 0 assert "name" in serializer.data["topics"][0] assert len(serializer.data["runs"]) == 2 @@ -35,9 +33,9 @@ def test_serialize_course_model(): assert serializer.data["departments"][0] is not None assert serializer.data["platform"] is not None assert ( - serializer.data["course"] == serializers.CourseSerializer(instance=course).data + serializer.data["course"] + == serializers.CourseSerializer(instance=course_resource.course).data ) - assert serializer.data["course"]["extra_course_numbers"] is not None assert serializer.data["program"] is None @@ -189,7 +187,7 @@ def test_content_file_serializer(): "content_title": "test title", "section": "test section", } - platform = constants.PlatformType.xpro.value + platform = constants.PlatformType.xpro.name course = factories.CourseFactory.create(platform=platform) content_file = factories.ContentFileFactory.create( run=course.learning_resource.runs.first(), **content_kwargs diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index a7e763ed32..816d480282 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -5,12 +5,14 @@ from learning_resources_search.connection import get_default_alias_name from learning_resources_search.constants import ( + COURSE_QUERY_FIELDS, LEARNING_RESOURCE_QUERY_FIELDS, LEARNING_RESOURCE_SEARCH_FILTERS, LEARNING_RESOURCE_TYPES, RUN_INSTRUCTORS_QUERY_FIELDS, RUNS_QUERY_FIELDS, SEARCH_NESTED_FILTERS, + SOURCE_EXCLUDED_FIELDS, TOPICS_QUERY_FIELDS, ) @@ -98,6 +100,14 @@ def generate_text_clause(text): } } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + query_type: {"query": text, "fields": COURSE_QUERY_FIELDS} + }, + } + }, { "nested": { "path": "runs", @@ -326,6 +336,8 @@ def execute_learn_search(search_params): ) search = Search(index=",".join(indexes)) + search = search.source(fields={"excludes": SOURCE_EXCLUDED_FIELDS}) + if search_params.get("offset"): search = search.extra(from_=search_params.get("offset")) diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index 0a70abfce8..8d3739c3e6 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -12,6 +12,7 @@ generate_text_clause, relevant_indexes, ) +from learning_resources_search.constants import SOURCE_EXCLUDED_FIELDS @pytest.mark.parametrize( @@ -71,6 +72,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -93,6 +95,19 @@ def test_generate_text_clause(): } } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "course.course_numbers.value", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -148,6 +163,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -164,6 +180,19 @@ def test_generate_text_clause(): "readable_id": {"value": "MATH*", "rewrite": "constant_score"} } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "course.course_numbers.value", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -219,6 +248,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -241,6 +271,19 @@ def test_generate_text_clause(): } } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "query_string": { + "query": '"math"', + "fields": [ + "course.course_numbers.value", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -296,6 +339,7 @@ def test_generate_text_clause(): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -315,6 +359,19 @@ def test_generate_text_clause(): "readable_id": {"value": '"MATH"*', "rewrite": "constant_score"} } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "query_string": { + "query": '"math"', + "fields": [ + "course.course_numbers.value", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -513,6 +570,7 @@ def test_execute_learn_search(opensearch): } query = { + "_source": {"excludes": SOURCE_EXCLUDED_FIELDS}, "query": { "bool": { "should": [ @@ -538,6 +596,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -562,6 +621,19 @@ def test_execute_learn_search(opensearch): } } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "course.course_numbers.value", + ], + } + }, + } + }, { "nested": { "path": "runs", @@ -618,6 +690,7 @@ def test_execute_learn_search(opensearch): "offered_by", "department", "resource_content_tags", + "course", ], } }, @@ -640,6 +713,19 @@ def test_execute_learn_search(opensearch): } } }, + { + "nested": { + "path": "course.course_numbers", + "query": { + "multi_match": { + "query": "math", + "fields": [ + "course.course_numbers.value" + ], + } + }, + } + }, { "nested": { "path": "runs", diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py index 7c584b759b..0a4205936a 100644 --- a/learning_resources_search/constants.py +++ b/learning_resources_search/constants.py @@ -61,6 +61,7 @@ class IndexestoUpdate(Enum): "platform", "professional", "id", + "course", ] SEARCH_NESTED_FILTERS = { @@ -119,6 +120,24 @@ class IndexestoUpdate(Enum): }, "offered_by": {"type": "keyword"}, "resource_content_tags": {"type": "keyword"}, + "course": { + "properties": { + "course_numbers": { + "type": "nested", + "properties": { + "value": {"type": "keyword"}, + "sort_coursenum": {"type": "keyword"}, + "department": { + "properties": { + "department_id": {"type": "keyword"}, + "name": {"type": "keyword"}, + } + }, + "primary": {"type": "boolean"}, + }, + } + } + }, "runs": { "type": "nested", "properties": { @@ -162,6 +181,7 @@ class IndexestoUpdate(Enum): "offered_by", "department", "resource_content_tags", + "course", ] AGGREGATIONS = [ @@ -178,6 +198,10 @@ class IndexestoUpdate(Enum): TOPICS_QUERY_FIELDS = ["topics.name"] +COURSE_QUERY_FIELDS = [ + "course.course_numbers.value", +] + RUNS_QUERY_FIELDS = [ "runs.year", "runs.semester", @@ -196,3 +220,8 @@ class IndexestoUpdate(Enum): } SEARCH_CONN_EXCEPTIONS = (ESConnectionError, UrlTimeoutError) + +SOURCE_EXCLUDED_FIELDS = [ + "course.course_numbers.sort_coursenum", + "course.course_numbers.primary", +] diff --git a/learning_resources_search/management/commands/update_index.py b/learning_resources_search/management/commands/update_index.py index fe0857d9f9..fce9d1751d 100644 --- a/learning_resources_search/management/commands/update_index.py +++ b/learning_resources_search/management/commands/update_index.py @@ -16,8 +16,7 @@ def add_arguments(self, parser): allowed_course_platforms = [ platform.value for platform in PlatformType - if platform.value - not in [PlatformType.youtube.value, PlatformType.podcast.value] + if platform.value not in [PlatformType.podcast.value] ] parser.add_argument( diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py index 9a212f1a8d..e1235679cb 100644 --- a/learning_resources_search/serializers.py +++ b/learning_resources_search/serializers.py @@ -8,6 +8,7 @@ from rest_framework import serializers from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType +from learning_resources.etl.constants import CourseNumberType from learning_resources.models import LearningResource from learning_resources.serializers import ( LearningResourceSerializer, @@ -17,6 +18,65 @@ log = logging.getLogger() +def add_extra_course_number_fields(resource_data: dict): + """Add sortable coursenums and primary(boolean) fields to course.course_numbers""" + if resource_data.get("course"): + course_numbers = resource_data["course"].get("course_numbers", []) + for coursenum_data in course_numbers: + department_data = coursenum_data.get("department", {}) + department_num = ( + department_data.get("department_id") if department_data else None + ) + course_num = coursenum_data.get("value") + if ( + department_num + and department_num[0].isdigit() + and len(department_num) == 1 + ): + sort_coursenum = f"0{course_num}" + else: + sort_coursenum = course_num + coursenum_data["primary"] = ( + coursenum_data.get("listing_type") == CourseNumberType.primary.value + ) + coursenum_data["sort_coursenum"] = sort_coursenum + + +def transform_resource_data(resource_data: dict) -> dict: + """ + Apply transformations on the resource data + + Args: + resource_data(dict): The resource data + + Returns: + dict: The transformed resource data + + """ + add_extra_course_number_fields(resource_data) + return resource_data + + +def serialize_learning_resource_for_update( + learning_resource_obj: LearningResource, +) -> dict: + """ + Add any special search-related fields to the serializer data here + + Args: + learning_resource_obj(LearningResource): The learning resource object + + Returns: + dict: The serialized and transformed resource data + + """ + return { + **transform_resource_data( + LearningResourceSerializer(learning_resource_obj).data + ), + } + + def extract_values(obj, key): """ Pull all values of specified key from nested JSON. @@ -261,7 +321,7 @@ def serialize_course_for_bulk(learning_resource_obj): """ return { "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, + **serialize_learning_resource_for_update(learning_resource_obj), } @@ -296,7 +356,7 @@ def serialize_program_for_bulk(learning_resource_obj): """ return { "_id": learning_resource_obj.id, - **LearningResourceSerializer(learning_resource_obj).data, + **serialize_learning_resource_for_update(learning_resource_obj), } diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py index 69885ee017..572d4132a5 100644 --- a/learning_resources_search/serializers_test.py +++ b/learning_resources_search/serializers_test.py @@ -1,11 +1,11 @@ """Tests for opensearch serializers""" - - import pytest from django.http import QueryDict from rest_framework.renderers import JSONRenderer from learning_resources import factories +from learning_resources.constants import DEPARTMENTS +from learning_resources.etl.constants import CourseNumberType from learning_resources.models import Course, Program from learning_resources.serializers import LearningResourceSerializer from learning_resources_search import serializers @@ -35,16 +35,51 @@ def test_serialize_bulk_courses(mocker): @pytest.mark.django_db() -def test_serialize_course_for_bulk(): +@pytest.mark.parametrize( + ("readable_id", "sort_course_num"), [("1", "01"), ("15", "15"), ("CMS-W", "CMS-W")] +) +@pytest.mark.parametrize( + ("extra_num", "sorted_extra_num"), [("2", "02"), ("16", "16"), ("CC", "CC")] +) +def test_serialize_course_for_bulk( + readable_id, sort_course_num, extra_num, sorted_extra_num +): """ Test that serialize_course_for_bulk yields a valid LearningResourceSerializer """ - course = factories.CourseFactory.create() - assert serializers.serialize_course_for_bulk(course.learning_resource) == { - "_id": course.learning_resource.id, - **LearningResourceSerializer(course.learning_resource).data, + course_numbers = [ + { + "value": readable_id, + "listing_type": CourseNumberType.primary.value, + "department": { + "department_id": readable_id, + "name": DEPARTMENTS[readable_id], + }, + }, + { + "value": extra_num, + "listing_type": CourseNumberType.cross_listed.value, + "department": {"department_id": extra_num, "name": DEPARTMENTS[extra_num]}, + }, + ] + resource = factories.CourseFactory.create( + course_numbers=course_numbers + ).learning_resource + assert resource.course.course_numbers == course_numbers + expected_data = {"_id": resource.id, **LearningResourceSerializer(resource).data} + expected_data["course"]["course_numbers"][0] = { + **expected_data["course"]["course_numbers"][0], + "primary": True, + "sort_coursenum": sort_course_num, + } + expected_data["course"]["course_numbers"][1] = { + **expected_data["course"]["course_numbers"][1], + "primary": False, + "sort_coursenum": sorted_extra_num, } + assert serializers.serialize_course_for_bulk(resource) == expected_data + @pytest.mark.django_db() def test_serialize_bulk_programs(mocker): @@ -211,7 +246,6 @@ def test_learning_resources_search_response_serializer(settings): "professional": True, "certification": "Certificates", "prices": [2250.0], - "course": {"extra_course_numbers": None}, "learning_path": None, "podcast": None, "podcast_episode": None, @@ -241,6 +275,18 @@ def test_learning_resources_search_response_serializer(settings): "checksum": None, } ], + "course": { + "course_numbers": [ + { + "value": "1.001", + "department": { + "department_id": "1", + "name": DEPARTMENTS["1"], + }, + "listing_type": "Primary", + } + ] + }, "image": { "id": 16, "url": "https://xpro-app-production.s3.amazonaws.com/original_images/MCPO-800x500.jpg", @@ -328,7 +374,6 @@ def test_learning_resources_search_response_serializer(settings): "professional": True, "certification": "Certificates", "prices": [2250.0], - "course": {"extra_course_numbers": None}, "learning_path": None, "podcast": None, "podcast_episode": None, @@ -358,6 +403,18 @@ def test_learning_resources_search_response_serializer(settings): "checksum": None, } ], + "course": { + "course_numbers": [ + { + "value": "1.001", + "department": { + "department_id": "1", + "name": DEPARTMENTS["1"], + }, + "listing_type": "Primary", + } + ] + }, "image": { "id": 16, "url": "https://xpro-app-production.s3.amazonaws.com/original_images/MCPO-800x500.jpg", diff --git a/learning_resources_search/tasks.py b/learning_resources_search/tasks.py index bcc89aa363..1ec81ec790 100644 --- a/learning_resources_search/tasks.py +++ b/learning_resources_search/tasks.py @@ -12,7 +12,6 @@ from opensearchpy.exceptions import NotFoundError, RequestError from learning_resources.models import Course, LearningResource, Program -from learning_resources.serializers import LearningResourceSerializer from learning_resources.utils import load_course_blocklist from learning_resources_search import indexing_api as api from learning_resources_search.constants import ( @@ -22,6 +21,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError +from learning_resources_search.serializers import serialize_learning_resource_for_update from open_discussions.celery import app from open_discussions.utils import chunks, merge_strings @@ -50,7 +50,7 @@ def deindex_document(doc_id, object_type, **kwargs): def upsert_course(course_id): """Upsert course based on stored database information""" course_obj = LearningResource.objects.get(id=course_id) - course_data = LearningResourceSerializer(course_obj).data + course_data = serialize_learning_resource_for_update(course_obj) api.upsert_document( course_id, course_data, @@ -64,7 +64,7 @@ def upsert_program(program_id): """Upsert program based on stored database information""" program_obj = Program.objects.get(learning_resource_id=program_id) - program_data = LearningResourceSerializer(program_obj.learning_resource).data + program_data = serialize_learning_resource_for_update(program_obj.learning_resource) api.upsert_document( program_obj.learning_resource.id, program_data, diff --git a/learning_resources_search/tasks_test.py b/learning_resources_search/tasks_test.py index 9ca117f066..704776ce3e 100644 --- a/learning_resources_search/tasks_test.py +++ b/learning_resources_search/tasks_test.py @@ -10,11 +10,8 @@ from learning_resources.constants import PlatformType from learning_resources.factories import ( CourseFactory, - LearningResourcePlatformFactory, ProgramFactory, ) -from learning_resources.models import LearningResourcePlatform -from learning_resources.serializers import LearningResourceSerializer from learning_resources_search import tasks from learning_resources_search.constants import ( COURSE_TYPE, @@ -23,6 +20,7 @@ IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError +from learning_resources_search.serializers import serialize_learning_resource_for_update from learning_resources_search.tasks import ( deindex_document, finish_recreate_index, @@ -59,7 +57,7 @@ def test_upsert_course_task(mocked_api): """Test that upsert_course will serialize the course data and upsert it to the ES index""" course = CourseFactory.create() upsert_course(course.learning_resource_id) - data = LearningResourceSerializer(course.learning_resource).data + data = serialize_learning_resource_for_update(course.learning_resource) mocked_api.upsert_document.assert_called_once_with( course.learning_resource.id, data, @@ -72,7 +70,7 @@ def test_upsert_program_task(mocked_api): """Test that upsert_program will serialize the video data and upsert it to the ES index""" program = ProgramFactory.create() upsert_program(program) - data = LearningResourceSerializer(program.learning_resource).data + data = serialize_learning_resource_for_update(program.learning_resource) mocked_api.upsert_document.assert_called_once_with( program.learning_resource.id, data, @@ -318,7 +316,7 @@ def test_bulk_deletion_tasks(mocker, with_error, tasks_func_name, indexing_func_ None, ), (["course"], None), - (["course"], PlatformType.xpro.value), + (["course"], PlatformType.xpro.name), ], ) def test_start_update_index( @@ -342,23 +340,14 @@ def test_start_update_index( if COURSE_TYPE in indexes: courses = sorted( - [ - CourseFactory.create( - platform=LearningResourcePlatformFactory.create( - platform=platform.value - ) - ) - for platform in platforms - ], + [CourseFactory.create(platform=platform.name) for platform in platforms], key=lambda course: course.learning_resource_id, ) unpublished_courses = sorted( [ CourseFactory.create( - platform=LearningResourcePlatform.objects.get( - platform=platform.value - ), + platform=platform.name, is_unpublished=True, ) for platform in platforms diff --git a/openapi.yaml b/openapi.yaml index 8b93c1ab0d..3c86d36674 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -5445,22 +5445,17 @@ components: type: object description: Serializer for the Course model properties: - extra_course_numbers: - type: array - items: - type: string - maxLength: 128 + course_numbers: + type: object + additionalProperties: {} nullable: true CourseRequest: type: object description: Serializer for the Course model properties: - extra_course_numbers: - type: array - items: - type: string - minLength: 1 - maxLength: 128 + course_numbers: + type: object + additionalProperties: {} nullable: true LearningPath: type: object