Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion learning_resources/etl/podcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from learning_resources.constants import LearningResourceType
from learning_resources.etl.constants import ETLSource
from learning_resources.etl.utils import iso8601_duration
from learning_resources.models import PodcastEpisode
from main.utils import clean_data, frontend_absolute_url, now_in_utc

Expand Down Expand Up @@ -174,7 +175,7 @@ def transform_episode(rss_data, offered_by, topics, parent_image):
"podcast_episode": {
"episode_link": rss_data.link.text if rss_data.link else None,
"duration": (
rss_data.find("itunes:duration").text
iso8601_duration(rss_data.find("itunes:duration").text)
if rss_data.find("itunes:duration")
else None
),
Expand Down
4 changes: 2 additions & 2 deletions learning_resources/etl/podcast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_transform(mock_github_client, title, topics, offered_by):
"published": True,
"podcast_episode": {
"episode_link": "https://soundcloud.com/podcast/episode1",
"duration": "00:17:16",
"duration": "PT17M16S",
"rss": episodes_rss[0].prettify(),
},
"resource_type": LearningResourceType.podcast_episode.name,
Expand All @@ -185,7 +185,7 @@ def test_transform(mock_github_client, title, topics, offered_by):
"published": True,
"podcast_episode": {
"episode_link": "https://soundcloud.com/podcast/episode2",
"duration": "00:17:16",
"duration": "PT17M16S",
"rss": episodes_rss[1].prettify(),
},
"resource_type": LearningResourceType.podcast_episode.name,
Expand Down
29 changes: 29 additions & 0 deletions learning_resources/etl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import rapidjson
import requests
from django.conf import settings
from django.utils.dateparse import parse_duration
from django.utils.functional import SimpleLazyObject
from django.utils.text import slugify
from tika import parser as tika_parser
Expand Down Expand Up @@ -703,3 +704,31 @@ def parse_certification(offeror, runs_data):
if (availability and availability != AvailabilityType.archived.value)
]
)


def iso8601_duration(duration_str: str) -> str or None:
"""
Parse the duration from a string and return it in ISO-8601 format

Args:
duration_str (str): The duration as a string in one of various formats

Returns:
str: the duration in ISO-8601 format
"""
if not duration_str:
return None
delta = parse_duration(duration_str)
if delta is None:
log.warning("Could not parse duration string %s", duration_str)
return None

hours, remainder = divmod(delta.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60)

if hours or minutes or seconds:
hour_duration = f"{int(hours)}H" if hours else ""
minute_duration = f"{int(minutes)}M" if minutes else ""
second_duration = f"{int(seconds)}S" if seconds else ""
return f"PT{hour_duration}{minute_duration}{second_duration}"
return "PT0S"
28 changes: 28 additions & 0 deletions learning_resources/etl/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,31 @@ def test_calc_checksum(previous_archive, identical):
def test_get_department_id_by_name(dept_name, dept_id):
"""Test that the correct department ID (if any) is returned"""
assert utils.get_department_id_by_name(dept_name) == dept_id


@pytest.mark.parametrize(
("duration_str", "expected"),
[
("1:00:00", "PT1H"),
("1:30:04", "PT1H30M4S"),
("00:00", "PT0S"),
("00:00:00", "PT0S"),
("00:01:00", "PT1M"),
("01:00:00", "PT1H"),
("00:00:01", "PT1S"),
("02:59", "PT2M59S"),
("72:59", "PT1H12M59S"),
("3675", "PT1H1M15S"),
("5", "PT5S"),
("PT1H30M4S", "PT1H30M4S"),
("", None),
(None, None),
("bad_duration", None),
("PTBarnum", None),
],
)
def test_parse_duration(mocker, duration_str, expected):
"""Test that parse_duration returns the expected duration"""
mock_warn = mocker.patch("learning_resources.etl.utils.log.warning")
assert utils.iso8601_duration(duration_str) == expected
assert mock_warn.call_count == (1 if duration_str and expected is None else 0)