Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions learning_resources/etl/mitxonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,23 @@ def extract_courses():
return []


def parse_program_prices(program_data: dict) -> list[dict]:
"""Return a list of unique prices for a program"""
prices = [program_data.get("current_price") or 0.00]
price_string = parse_page_attribute(program_data, "price")
if price_string:
prices.extend(
[
float(price.replace(",", ""))
for price in re.findall(r"[\d\.,]+", price_string)
]
def parse_prices(parent_data: dict) -> list[dict]:
"""
Return a list of unique prices for a course/program.
$0.00 (free) is always included for the non-certificate option.
Other prices come from the parent course/program's min_price & max_price fields.
"""
free_price_str = "0.00"
return [
transform_price(price)
for price in sorted(
{
Decimal(free_price_str),
Decimal(parent_data.get("min_price") or free_price_str),
Decimal(parent_data.get("max_price") or free_price_str),
}
)
return [transform_price(Decimal(price)) for price in sorted(set(prices))]
]


def parse_departments(departments_data: list[dict or str]) -> list[str]:
Expand Down Expand Up @@ -226,22 +231,7 @@ def _transform_run(course_run: dict, course: dict) -> dict:
),
"description": clean_data(parse_page_attribute(course_run, "description")),
"image": _transform_image(course_run),
"prices": [
transform_price(price)
for price in sorted(
{
Decimal("0.00"),
*[
Decimal(price)
for price in [
product.get("price")
for product in course_run.get("products", [])
]
if price is not None
],
}
)
],
"prices": parse_prices(course),
"instructors": [
{"full_name": instructor["name"]}
for instructor in parse_page_attribute(course, "instructors", is_list=True)
Expand Down Expand Up @@ -418,7 +408,7 @@ def transform_programs(programs: list[dict]) -> list[dict]:
"description": clean_data(
parse_page_attribute(program, "description")
),
"prices": parse_program_prices(program),
"prices": parse_prices(program),
"status": RunStatus.current.value
if parse_page_attribute(program, "page_url")
else RunStatus.archived.value,
Expand Down
64 changes: 13 additions & 51 deletions learning_resources/etl/mitxonline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

# pylint: disable=redefined-outer-name
from datetime import datetime
from decimal import Decimal
from unittest.mock import ANY
from urllib.parse import parse_qs, urljoin, urlparse

Expand All @@ -30,7 +29,7 @@
extract_programs,
parse_certificate_type,
parse_page_attribute,
parse_program_prices,
parse_prices,
transform_courses,
transform_programs,
transform_topics,
Expand Down Expand Up @@ -172,7 +171,7 @@ def test_mitxonline_transform_programs(
"published": bool(
program_data.get("page", {}).get("page_url", None) is not None
),
"prices": parse_program_prices(program_data),
"prices": parse_prices(program_data),
"image": _transform_image(program_data),
"title": program_data["title"],
"description": clean_data(
Expand Down Expand Up @@ -262,25 +261,7 @@ def test_mitxonline_transform_programs(
course_run_data["is_enrollable"]
and course_data["page"]["live"]
),
"prices": sorted(
[
{"amount": Decimal(i), "currency": CURRENCY_USD}
for i in {
0.00,
*[
price
for price in [
product.get("price")
for product in course_run_data.get(
"products", []
)
]
if price is not None
],
}
],
key=lambda x: x["amount"],
),
"prices": parse_prices(course_data),
"instructors": [
{"full_name": instructor["name"]}
for instructor in parse_page_attribute(
Expand Down Expand Up @@ -399,25 +380,7 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data):
"published": bool(
course_run_data["is_enrollable"] and course_data["page"]["live"]
),
"prices": sorted(
[
{"amount": Decimal(i), "currency": CURRENCY_USD}
for i in {
0.00,
*[
price
for price in [
product.get("price")
for product in course_run_data.get(
"products", []
)
]
if price is not None
],
}
],
key=lambda x: x["amount"],
),
"prices": parse_prices(course_data),
"instructors": [
{"full_name": instructor["name"]}
for instructor in parse_page_attribute(
Expand Down Expand Up @@ -536,19 +499,18 @@ def test_program_run_start_date_value( # noqa: PLR0913


@pytest.mark.parametrize(
("current_price", "page_price", "expected"),
("min_price", "max_price", "expected"),
[
(0, "100", [0, 100]),
(None, "$100 - $1,000", [0, 100, 1000]),
(99.99, "$99.99 - $3,000,000", [99.99, 3000000]),
(9.99, "$99.99 per course", [9.99, 99.99]),
(100, "varies from $29-$129", [100, 29, 129]),
(None, 100, [0, 100]),
(100, 1000, [0, 100, 1000]),
(99.99, 3000, [0.00, 99.99, 3000]),
(9.99, None, [0, 9.99]),
],
)
def test_parse_prices(current_price, page_price, expected):
"""Test that the prices are correctly parsed from the page data"""
program_data = {"current_price": current_price, "page": {"price": page_price}}
assert parse_program_prices(program_data) == sorted(
def test_parse_prices(min_price, max_price, expected):
"""Test that the prices are correctly parsed from the parent data"""
program_data = {"min_price": min_price, "max_price": max_price}
assert parse_prices(program_data) == sorted(
[{"amount": float(price), "currency": CURRENCY_USD} for price in expected],
key=lambda x: x["amount"],
)
Expand Down
6 changes: 6 additions & 0 deletions test_json/mitxonline_courses.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
"name": "Mathematics"
}
],
"min_price": 123,
"max_price": 456,
"certificate_type": "Certificate of Completion",
"required_prerequisites": true,
"duration": "14 weeks",
Expand Down Expand Up @@ -147,6 +149,8 @@
}
]
},
"min_price": null,
"max_price": 456,
"programs": null,
"topics": [
{
Expand Down Expand Up @@ -218,6 +222,8 @@
}
]
},
"min_price": 123,
"max_price": null,
"programs": null,
"topics": [
{
Expand Down
2 changes: 2 additions & 0 deletions test_json/mitxonline_programs.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@
"effort": "7 hrs/wk",
"price": "$175"
},
"min_price": 1230,
"max_price": 4560,
"program_type": "Series",
"certificate_type": "Certificate of Completion",
"departments": [
Expand Down
Loading