Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@
"description": "S3 prefix for MITx bucket keys",
"required": false
},
"EDX_PROGRAMS_API_URL": {
"description": "The catalog url for MITx programs",
"required": false
},
"OPENSEARCH_HTTP_AUTH": {
"description": "Basic auth settings for connecting to OpenSearch"
},
Expand Down
1 change: 1 addition & 0 deletions env/codespaces.env
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ XPRO_CATALOG_API_URL=https://xpro.mit.edu/api/programs/
XPRO_COURSES_API_URL=https://xpro.mit.edu/api/courses/
EDX_API_ACCESS_TOKEN_URL=https://api.edx.org/oauth2/v1/access_token
EDX_API_URL=https://api.edx.org/catalog/v1/catalogs/10/courses
EDX_PROGRAMS_API_URL=https://discovery.edx.org/api/v1/programs/
OCW_BASE_URL=https://ocw.mit.edu/
MICROMASTERS_CATALOG_API_URL=https://micromasters.mit.edu/api/v0/catalog/
MICROMASTERS_COURSE_URL=https://micromasters.mit.edu/api/v0/courseruns/
Expand Down
8 changes: 8 additions & 0 deletions learning_resources/etl/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Common ETL test fixtures"""

import json
from pathlib import Path

import pytest

Expand Down Expand Up @@ -31,3 +32,10 @@ def non_mitx_course_data():
"""Catalog data fixture"""
with open("./test_json/test_non_mitx_course.json") as f: # noqa: PTH123
yield json.loads(f.read())


@pytest.fixture
def mitx_programs_data():
"""Yield a data fixture for MITx programs"""
with Path.open(Path("./test_json/test_mitx_programs.json")) as f:
yield json.loads(f.read())
3 changes: 2 additions & 1 deletion learning_resources/etl/mit_edx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from toolz import compose, curried

from learning_resources.constants import OfferedBy, PlatformType
from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType
from learning_resources.etl.constants import ETLSource
from learning_resources.etl.openedx import (
MIT_OWNER_KEYS,
Expand Down Expand Up @@ -71,6 +71,7 @@ def get_open_edx_config():
PlatformType.edx.name,
OfferedBy.mitx.name,
ETLSource.mit_edx.name,
LearningResourceType.course.name,
)


Expand Down
73 changes: 73 additions & 0 deletions learning_resources/etl/mit_edx_programs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""MIT edX ETL"""

import logging

from django.conf import settings
from toolz import compose, curried

from learning_resources.constants import LearningResourceType, OfferedBy, PlatformType
from learning_resources.etl.constants import ETLSource
from learning_resources.etl.openedx import (
MIT_OWNER_KEYS,
OpenEdxConfiguration,
openedx_extract_transform_factory,
)

log = logging.getLogger()


def _is_mit_program(program: dict) -> bool:
"""
Helper function to determine if a program is an MIT program

Args:
program (dict): The JSON object representing the program with all its courses

Returns:
bool: indicates whether the program is owned by MIT
""" # noqa: D401
return (
any(
owner["key"] in MIT_OWNER_KEYS
for owner in program.get("authoring_organizations")
)
and "micromasters" not in program.get("type", "").lower()
and program.get("status") == "active"
)


def get_open_edx_config():
"""
Return the program OpenEdxConfiguration for edX.
"""
required_settings = [
"EDX_API_CLIENT_ID",
"EDX_API_CLIENT_SECRET",
"EDX_API_ACCESS_TOKEN_URL",
"EDX_PROGRAMS_API_URL",
"EDX_BASE_URL",
"EDX_ALT_URL",
]
for setting in required_settings:
if not getattr(settings, setting):
log.warning("Missing required setting %s", setting)
return OpenEdxConfiguration(
settings.EDX_API_CLIENT_ID,
settings.EDX_API_CLIENT_SECRET,
settings.EDX_API_ACCESS_TOKEN_URL,
settings.EDX_PROGRAMS_API_URL,
settings.EDX_BASE_URL,
settings.EDX_ALT_URL,
PlatformType.edx.name,
OfferedBy.mitx.name,
ETLSource.mit_edx.name,
LearningResourceType.program.name,
)


# use the OpenEdx factory to create our extract and transform funcs
extract, _transform = openedx_extract_transform_factory(get_open_edx_config)

# modified transform function that filters the program list to ones
# that pass the _is_mit_program() predicate
transform = compose(_transform, curried.filter(_is_mit_program))
13 changes: 13 additions & 0 deletions learning_resources/etl/mit_edx_programs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Tests for mit_edx_programs"""

import pytest

from learning_resources.etl.mit_edx_programs import transform


@pytest.mark.django_db
def test_mitx_transform_mit_owner(mitx_programs_data):
"""Verify that only non-micromasters programs with MIT owners are returned"""
transformed = list(transform(mitx_programs_data))
assert len(transformed) == 1
assert transformed[0]["title"] == "Circuits and Electronics"
Loading