In [45]:
import copy
import re
import requests
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from datetime import datetime
from requests import Response
from typing import Callable, Optional, Tuple, cast
from xml.etree.ElementTree import Element

@dataclass(frozen = True)
class PyPiRelease():

    '''Represents an item of a pypi.org's releases.xml file.'''

    title : Optional[str]
    link : Optional[str]
    description : Optional[str]
    author : Optional[str]
    pubdate : Optional[datetime]
    pubdate_str : Optional[str]
@dataclass(frozen = True)
class PyPiSession():

    '''Represents a fetching session performed by PyPiReleaseManager.'''

    package_name : str
    releases : list[PyPiRelease]
    latest_version : Optional[str]
    latest_version_date : Optional[datetime]
class PyPiReleaseManager():

    '''This is a client for PyPi release pages.'''

    __get_function : Callable[[str], Response]
    __logging_function : Callable[[str], None]

    def __init__(
            self, 
            get_function : Callable[[str], Response] = lambda url : requests.get(url),
            logging_function : Callable[[str], None] = lambda msg : print(msg)) -> None:
        
        self.__get_function = get_function
        self.__logging_function = logging_function

    def __format_url(self, package_name : str) -> str:

        '''Returns the URL for the package's releases.xml.'''

        url : str =  f"https://pypi.org/rss/project/{package_name}/releases.xml"

        return url
    def __format_optional_string(self, opt : Optional[str]) -> str:

        '''Returns "string" or "None".'''

        string : str = "None"
        if opt is not None:
            string = str(opt)

        return string
    def __format_optional_datetime(self, dt : Optional[datetime]) -> str:

        '''Returns a date string formatted as "2024-10-05" or "None".'''

        dt_str : str = "None"
        if dt is not None:
            dt_str = cast(datetime, dt).strftime('%Y-%m-%d')

        return dt_str      
    def __try_extract_text(self, element : Element, path : str) -> Optional[str]:

        '''Extracts the text from the provided element according to path or returns None.'''

        try:

            result : Optional[Element] = element.find(path = path)

            return cast(Element, result).text

        except:
            return None
    def __try_extract_title(self, element : Element) -> Optional[str]:

        '''Extracts the title from the provided element or returns None.'''

        return self.__try_extract_text(element = element, path = "title")
    def __try_extract_link(self, element : Element) -> Optional[str]:

        '''Extracts the link from the provided element or returns None.'''

        return self.__try_extract_text(element = element, path = "link")
    def __try_extract_description(self, element : Element) -> Optional[str]:

        '''Extracts the description from the provided element or returns None.'''

        return self.__try_extract_text(element = element, path = "description")
    def __try_extract_author(self, element : Element) -> Optional[str]:

        '''Extracts the author from the provided element or returns None.'''

        return self.__try_extract_text(element = element, path = "author")
    def __try_extract_pubdate_str(self, element : Element) -> Optional[str]:

        '''Extracts the pubDate from the provided element or returns None.'''

        return self.__try_extract_text(element = element, path = "pubDate")
    def __parse_pubdate_str(self, pubdate_str : Optional[str]) -> Optional[datetime]:

        '''
            This method expect a dt_str as in the following examples:

                Fri, 20 Sep 2024 13:08:42 GMT
                Wed, 10 Apr 2024 19:44:10 GMT
                Fri, 23 Feb 2024 15:30:19 GMT
                Sat, 20 Jan 2024 02:10:54 GMT
                ...
        '''

        if pubdate_str:

            format : str = "%a, %d %b %Y %H:%M:%S %Z"
            pubdate = datetime.strptime(pubdate_str, format)

            return pubdate
        
        else:
            return None
    def __parse_response(self, response : Response) -> list[PyPiRelease]:

        '''Convert the provided response to a list of PyPiRelease objects.'''
    
        root : Element = ET.fromstring(text = response.text)

        releases : list[PyPiRelease] = []
        for channel in root.findall("channel"):
            for item in channel.findall("item"):
                
                title : Optional[str] = self.__try_extract_title(element = item)
                link : Optional[str] = self.__try_extract_link(element = item)
                description : Optional[str] = self.__try_extract_description(element = item)
                author : Optional[str] = self.__try_extract_author(element = item)
                pubdate_str : Optional[str] = self.__try_extract_pubdate_str(element = item)
                pubdate : Optional[datetime] = self.__parse_pubdate_str(pubdate_str = pubdate_str)
                
                release : PyPiRelease = PyPiRelease(
                    title = title,
                    link = link,
                    description = description,
                    author = author,
                    pubdate_str = pubdate_str,
                    pubdate = pubdate
                )

                releases.append(release)

        return releases
    def __has_title(self, release : PyPiRelease) -> bool:

        '''Retuns True if pypi_item.title is not None.'''

        try:

            cast(str, release.title)

            return True

        except:
            return False
    def __has_pubdate(self, release : PyPiRelease) -> bool:

        '''Retuns True if pypi_item.pubdate is not None.'''

        try:

            cast(datetime, release.pubdate)

            return True

        except:
            return False
    def __is_final_release(self, release : PyPiRelease) -> bool:

        '''
            ['2.1.2', '2.1.1', '2.0.2', '2.1.0']    => True
            ['2.1.0rc1', '7.0.0b1']                 => False
        '''

        pattern : str = r'^\d+\.\d+\.\d+$'
        status : bool = bool(re.match(pattern, str(release.title)))

        return status
    def __filter(self, releases : list[PyPiRelease], function : Callable[[PyPiRelease], bool]) -> list[PyPiRelease]:

        '''Runs function on releases.'''

        lst : list[PyPiRelease] = [release for release in releases if function(release)]

        return lst
    def __sort_by_pubdate(self, releases : list[PyPiRelease], reverse : bool = True) -> list[PyPiRelease]:

        '''
            reverse = True => Descending
            reverse = False => Ascending
        '''

        lst : list[PyPiRelease] = copy.deepcopy(releases)
        lst.sort(key = lambda x : cast(datetime, x.pubdate), reverse = reverse)

        return lst
    def __get_most_recent(self, releases : list[PyPiRelease]) -> Tuple[Optional[str], Optional[datetime]]:
        
        '''Returns (title, pubdate).'''
        
        most_recent : PyPiRelease = releases[0]
        if most_recent is None:
            return (None, None)

        return (releases[0].title, releases[0].pubdate)

    def fetch(self, package_name : str, only_final_releases : bool) -> PyPiSession:

        '''Retrieves all the releases from PyPi.org.'''

        url : str =  self.__format_url(package_name = package_name)
        response : Response = self.__get_function(url)

        releases : list[PyPiRelease] = self.__parse_response(response = response)
        releases = self.__filter(releases = releases, function = lambda x : self.__has_title(release = x))
        releases = self.__filter(releases = releases, function = lambda x : self.__has_pubdate(release = x))
        releases = self.__sort_by_pubdate(releases = releases)

        if only_final_releases:
            releases = self.__filter(releases = releases, function = lambda x : self.__is_final_release(release = x))

        latest_version, latest_version_date = self.__get_most_recent(releases = releases)

        session : PyPiSession = PyPiSession(
            package_name = package_name,
            releases = releases,
            latest_version = latest_version,
            latest_version_date = latest_version_date
        )

        return session
    def format_session(self, session : PyPiSession) -> str:

        '''
            Formats the content of the provided session.

            Example: "('numpy', '2.1.2', '2024-10-05')"    
        '''

        latest_version : str = self.__format_optional_string(opt = session.latest_version)
        latest_version_date_str : str = self.__format_optional_datetime(dt = session.latest_version_date)

        msg : str = f"('{session.package_name}', '{latest_version}', '{latest_version_date_str}')"

        return msg       
    def format_release(self, release : PyPiRelease) -> str:

        '''
            Formats the content of the provided release.

            Example: "{ 'title': '2.1.2', 'pubdate': '2024-10-05' }"
        '''

        return str(
                "{ "
                f"'title': '{release.title}', "
                f"'pubdate': '{self.__format_optional_datetime(dt = release.pubdate)}'"
                " }"                
            ) 
    def log_session(self, session : PyPiSession) -> None:

        '''Formats the content of the provided session and logs it.'''

        msg : str = self.format_session(session = session)
        self.__logging_function(msg)
    def log_releases(self, releases : list[PyPiRelease]) -> None: 

        '''Logs releases.'''

        for release in releases:
            msg : str = self.format_release(release = release)
            self.__logging_function(msg)

release_manager : PyPiReleaseManager = PyPiReleaseManager()
session : PyPiSession = release_manager.fetch(package_name = "numpy", only_final_releases = True)
# release_manager.log_session(session = session)
release_manager.log_releases(releases = session.releases)


{ 'title': '2.1.2', 'pubdate': '2024-10-05' }
{ 'title': '2.1.1', 'pubdate': '2024-09-03' }
{ 'title': '2.0.2', 'pubdate': '2024-08-26' }
{ 'title': '2.1.0', 'pubdate': '2024-08-18' }
{ 'title': '2.0.1', 'pubdate': '2024-07-21' }
{ 'title': '2.0.0', 'pubdate': '2024-06-16' }
{ 'title': '1.26.4', 'pubdate': '2024-02-05' }
{ 'title': '1.26.3', 'pubdate': '2024-01-02' }
{ 'title': '1.26.2', 'pubdate': '2023-11-12' }
{ 'title': '1.26.1', 'pubdate': '2023-10-14' }
{ 'title': '1.26.0', 'pubdate': '2023-09-16' }
{ 'title': '1.25.2', 'pubdate': '2023-07-31' }
{ 'title': '1.25.1', 'pubdate': '2023-07-08' }
{ 'title': '1.24.4', 'pubdate': '2023-06-26' }
{ 'title': '1.25.0', 'pubdate': '2023-06-17' }
{ 'title': '1.24.3', 'pubdate': '2023-04-22' }
{ 'title': '1.24.2', 'pubdate': '2023-02-05' }
{ 'title': '1.24.1', 'pubdate': '2022-12-26' }
{ 'title': '1.24.0', 'pubdate': '2022-12-18' }
{ 'title': '1.23.5', 'pubdate': '2022-11-20' }
{ 'title': '1.23.4', 'pubdate': '2022-10-12' }
{ 'title': '1.23.3'