From edeea73dc93ccfb7c68ada4aa318d7939ef96efa Mon Sep 17 00:00:00 2001 From: Julien Enselme Date: Sun, 28 Apr 2024 12:41:33 +0200 Subject: [PATCH] Correct parsing of media values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - They all go into lists. - They don’t erase the entry title or summary. See #195 and #53 --- feedparser/namespaces/mediarss.py | 36 +++++++++++++------ .../media-rss/item_media_title_type_plain.xml | 2 +- .../wellformed/media-rss/media_content_1.xml | 16 +++++++++ .../wellformed/media-rss/media_content_2.xml | 16 +++++++++ .../wellformed/media-rss/media_content_3.xml | 22 ++++++++++++ tests/wellformed/media-rss/media_group.xml | 2 +- 6 files changed, 82 insertions(+), 12 deletions(-) create mode 100644 tests/wellformed/media-rss/media_content_1.xml create mode 100644 tests/wellformed/media-rss/media_content_2.xml create mode 100644 tests/wellformed/media-rss/media_content_3.xml diff --git a/feedparser/namespaces/mediarss.py b/feedparser/namespaces/mediarss.py index 5ec4b67b..29a94c42 100644 --- a/feedparser/namespaces/mediarss.py +++ b/feedparser/namespaces/mediarss.py @@ -49,14 +49,6 @@ def _end_media_keywords(self): if term.strip(): self._add_tag(term.strip(), None, None) - def _start_media_title(self, attrs_d): - self._start_title(attrs_d) - - def _end_media_title(self): - title_depth = self.title_depth - self._end_title() - self.title_depth = title_depth - def _start_media_group(self, attrs_d): # don't do anything, but don't break the enclosed tags either pass @@ -85,10 +77,34 @@ def _end_media_credit(self): context["media_credit"][-1]["content"] = credit def _start_media_description(self, attrs_d): - self._start_description(attrs_d) + context = self._get_context() + context.setdefault("media_description", []) + attrs = attrs_d.copy() + if "type" in attrs: + attrs["type"] = self.map_content_type(attrs["type"]) + context["media_description"].append(attrs) + self.push("media_desc", 1) def _end_media_description(self): - self._end_description() + description_ = self.pop("media_desc") + if description_ is not None and description_.strip(): + context = self._get_context() + context["media_description"][-1]["content"] = description_ + + def _start_media_title(self, attrs_d): + context = self._get_context() + context.setdefault("media_title", []) + attrs = attrs_d.copy() + if "type" in attrs: + attrs["type"] = self.map_content_type(attrs["type"]) + context["media_title"].append(attrs) + self.push("title", 1) + + def _end_media_title(self): + title_ = self.pop("title") + if title_ is not None and title_.strip(): + context = self._get_context() + context["media_title"][-1]["content"] = title_ def _start_media_restriction(self, attrs_d): context = self._get_context() diff --git a/tests/wellformed/media-rss/item_media_title_type_plain.xml b/tests/wellformed/media-rss/item_media_title_type_plain.xml index 923d1f76..7546fc47 100644 --- a/tests/wellformed/media-rss/item_media_title_type_plain.xml +++ b/tests/wellformed/media-rss/item_media_title_type_plain.xml @@ -1,6 +1,6 @@ diff --git a/tests/wellformed/media-rss/media_content_1.xml b/tests/wellformed/media-rss/media_content_1.xml new file mode 100644 index 00000000..021f90fe --- /dev/null +++ b/tests/wellformed/media-rss/media_content_1.xml @@ -0,0 +1,16 @@ + + + + + Media title + Media description + Media credit + + Example Atom + Test description + + diff --git a/tests/wellformed/media-rss/media_content_2.xml b/tests/wellformed/media-rss/media_content_2.xml new file mode 100644 index 00000000..8fc02940 --- /dev/null +++ b/tests/wellformed/media-rss/media_content_2.xml @@ -0,0 +1,16 @@ + + + + Example Atom + Test description + + Media title + Media description + Media credit + + + diff --git a/tests/wellformed/media-rss/media_content_3.xml b/tests/wellformed/media-rss/media_content_3.xml new file mode 100644 index 00000000..c381820f --- /dev/null +++ b/tests/wellformed/media-rss/media_content_3.xml @@ -0,0 +1,22 @@ + + + + Example Atom + Test description + + Media title + Media description + Media credit + + + Media title 2 + Media description 2 + Media credit 2 + + + diff --git a/tests/wellformed/media-rss/media_group.xml b/tests/wellformed/media-rss/media_group.xml index 360be061..b0ce7c92 100644 --- a/tests/wellformed/media-rss/media_group.xml +++ b/tests/wellformed/media-rss/media_group.xml @@ -1,6 +1,6 @@