From 65cbf3ab1331ca678e1d374541be3444ee9bd6d3 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 20 Sep 2024 07:51:27 -0400 Subject: [PATCH] Fix extract_openedx_data and backpopulate_mit_edx_data commands to work with course/program datafiles --- .../commands/backpopulate_mit_edx_data.py | 16 ++++++++++++---- .../management/commands/extract_openedx_data.py | 3 ++- learning_resources/tasks.py | 15 +++++++++++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/learning_resources/management/commands/backpopulate_mit_edx_data.py b/learning_resources/management/commands/backpopulate_mit_edx_data.py index 3886cfe64b..77e183e185 100644 --- a/learning_resources/management/commands/backpopulate_mit_edx_data.py +++ b/learning_resources/management/commands/backpopulate_mit_edx_data.py @@ -22,9 +22,15 @@ def add_arguments(self, parser): help="Delete all existing records first", ) parser.add_argument( - "--api_datafile", - dest="api_datafile", - help="If provided, use this file as the source of API data", + "--api_course_datafile", + dest="api_course_datafile", + help="If provided, use this file as the source of course API data", + default=None, + ) + parser.add_argument( + "--api_program_datafile", + dest="api_program_datafile", + help="If provided, use this file as the source of program API data", default=None, ) super().add_arguments(parser) @@ -40,7 +46,9 @@ def handle(self, *args, **options): # noqa: ARG002 ): resource_delete_actions(learning_resource) else: - task = get_mit_edx_data.delay(options["api_datafile"]) + task = get_mit_edx_data.delay( + options["api_course_datafile"], options["api_program_datafile"] + ) self.stdout.write(f"Started task {task} to get MIT edX course data") self.stdout.write("Waiting on task...") start = now_in_utc() diff --git a/learning_resources/management/commands/extract_openedx_data.py b/learning_resources/management/commands/extract_openedx_data.py index b7f14d3fe1..84cde80f33 100644 --- a/learning_resources/management/commands/extract_openedx_data.py +++ b/learning_resources/management/commands/extract_openedx_data.py @@ -5,13 +5,14 @@ from django.core.management import BaseCommand -from learning_resources.etl import mit_edx, oll +from learning_resources.etl import mit_edx, mit_edx_programs, oll from learning_resources.etl.constants import ETLSource from main.utils import now_in_utc EXTRACTORS = { ETLSource.oll.name: oll.extract, ETLSource.mit_edx.name: mit_edx.extract, + f"{ETLSource.mit_edx.name}_programs": mit_edx_programs.extract, } diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py index 3657875b89..0fd1527a3e 100644 --- a/learning_resources/tasks.py +++ b/learning_resources/tasks.py @@ -48,15 +48,22 @@ def get_micromasters_data(): @app.task -def get_mit_edx_data(api_datafile=None) -> int: +def get_mit_edx_data( + api_course_datafile: str | None = None, api_program_datafile: str | None = None +) -> int: """Task to sync MIT edX data with the database Args: - api_datafile (str): If provided, use this file as the source of API data + api_course_datafile (str): If provided, use file as source of course API data Otherwise, the API is queried directly. + api_program_datafile (str): If provided, use file as source of program API data. + Otherwise, the API is queried directly. + + Returns: + int: The number of results that were fetched """ - courses = pipelines.mit_edx_courses_etl(api_datafile) - programs = pipelines.mit_edx_programs_etl(api_datafile) + courses = pipelines.mit_edx_courses_etl(api_course_datafile) + programs = pipelines.mit_edx_programs_etl(api_program_datafile) clear_search_cache() return len(courses) + len(programs)