In [1]:
from datetime import datetime, timedelta
from time import sleep

import feedparser

In [2]:
RSS_ENDPOINT = "http://rss.cnn.com/rss/cnn_latest.rss"

In [10]:
class Feed:
    def __init__(self, title, description, link, published, media_content) -> None:
        self.title = title
        self.description = description
        self.link = link
        self.published = published
        self.media_content = media_content

    def __str__(self) -> str:
        return f"{self.title}\n{self.description}"


class NewsRSSParser:
    def __init__(self, rss_endpoint, interval=3) -> None:
        self.rss_endpoint = rss_endpoint
        self.feeds = None
        self.interval = timedelta(seconds=interval)
        self.last_update = datetime.now() - self.interval
    
    def _time_to_update(self):
        return datetime.now() - self.last_update > self.interval

    def _parse(self):
        if self._time_to_update():
            self.feeds = feedparser.parse(self.rss_endpoint)
            self.last_update = datetime.now()
        return self.feeds

    def get_feeds(self):
        raise NotImplementedError
    
class CNNRSSParser(NewsRSSParser):
    def __init__(self, rss_endpoint, interval=3) -> None:
        super().__init__(rss_endpoint, interval)
    
    def _convert_published(self, text):
        return datetime.strptime(text, "%a, %d %b %Y %H:%M:%S GMT")
    
    def _convert_media_content(self, media_content):
        content = [content for content in media_content if content['width'] == '300' and content['height'] == '300']
        return content[0] if content else None

    def _convert_feed(self, feed):
        print(feed['id'])
        title = feed['title']
        description = feed['summary']
        link = feed['link']
        published = self._convert_published(feed['published'])

        if 'media_content' in feed:
            media_content = self._convert_media_content(feed['media_content'])
        else:
            media_content = None
        return Feed(title, description, link, published, media_content)

    def get_feeds(self):
        self._parse()

        parsed_feeds = [self._convert_feed(feed) for feed in self.feeds['entries']]
        return parsed_feeds

In [11]:
parser = CNNRSSParser(RSS_ENDPOINT, 3)

In [12]:
feeds = parser.get_feeds()
print(list(map(str, feeds))[0])

https://www.cnn.com/2023/08/20/americas/bernardo-arevalo-wins-guatemala-presidential-election-intl/index.html
https://www.cnn.com/2023/08/03/tech/china-minors-mobile-phone-limits-intl-hnk/index.html
https://www.cnn.com/2023/08/20/economy/china-economy-lpr-cuts-hnk-intl/index.html
https://www.cnn.com/2023/08/21/weather/tropical-storm-hilary-california-southwest-monday/index.html
https://www.cnn.com/us/live-news/hurricane-hilary-path-08-20-23/index.html
https://www.cnn.com/europe/live-news/russia-ukraine-war-news-08-21-23/index.html
https://www.cnn.com/2023/08/18/us/washington-medical-lake-wildfire-evacuation/index.html
https://www.cnn.com/2023/08/21/china/china-national-security-second-cia-spy-intl-hnk/index.html
https://www.cnn.com/2023/08/21/politics/trump-georgia-surrender-debate-2024/index.html
https://www.cnn.com/2023/08/14/us/maui-wildfires-victims-identified/index.html
https://www.cnn.com/2023/08/19/us/hawaii-maui-wildfires-death-toll-saturday/index.html
https://www.cnn.com/2023/