Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,15 @@ def add_arguments(self, parser):
help="Delete all existing records first",
)
parser.add_argument(
"--api_datafile",
dest="api_datafile",
help="If provided, use this file as the source of API data",
"--api_course_datafile",
dest="api_course_datafile",
help="If provided, use this file as the source of course API data",
default=None,
)
parser.add_argument(
"--api_program_datafile",
dest="api_program_datafile",
help="If provided, use this file as the source of program API data",
default=None,
)
super().add_arguments(parser)
Expand All @@ -40,7 +46,9 @@ def handle(self, *args, **options): # noqa: ARG002
):
resource_delete_actions(learning_resource)
else:
task = get_mit_edx_data.delay(options["api_datafile"])
task = get_mit_edx_data.delay(
options["api_course_datafile"], options["api_program_datafile"]
)
self.stdout.write(f"Started task {task} to get MIT edX course data")
self.stdout.write("Waiting on task...")
start = now_in_utc()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@

from django.core.management import BaseCommand

from learning_resources.etl import mit_edx, oll
from learning_resources.etl import mit_edx, mit_edx_programs, oll
from learning_resources.etl.constants import ETLSource
from main.utils import now_in_utc

EXTRACTORS = {
ETLSource.oll.name: oll.extract,
ETLSource.mit_edx.name: mit_edx.extract,
f"{ETLSource.mit_edx.name}_programs": mit_edx_programs.extract,
}


Expand Down
15 changes: 11 additions & 4 deletions learning_resources/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,22 @@ def get_micromasters_data():


@app.task
def get_mit_edx_data(api_datafile=None) -> int:
def get_mit_edx_data(
api_course_datafile: str | None = None, api_program_datafile: str | None = None
) -> int:
"""Task to sync MIT edX data with the database

Args:
api_datafile (str): If provided, use this file as the source of API data
api_course_datafile (str): If provided, use file as source of course API data
Otherwise, the API is queried directly.
api_program_datafile (str): If provided, use file as source of program API data.
Otherwise, the API is queried directly.

Returns:
int: The number of results that were fetched
"""
courses = pipelines.mit_edx_courses_etl(api_datafile)
programs = pipelines.mit_edx_programs_etl(api_datafile)
courses = pipelines.mit_edx_courses_etl(api_course_datafile)
programs = pipelines.mit_edx_programs_etl(api_program_datafile)
clear_search_cache()
return len(courses) + len(programs)

Expand Down