## Get list of all packages

In [1]:
import pkgutil
from importlib.metadata import metadata
from pandas import DataFrame, Index

packages = [pkg.name for pkg in pkgutil.iter_modules() if pkg.ispkg]
packages = sorted([pkg for pkg in packages if not pkg.startswith("_")])

In [44]:
x.metadata.keys()

['Metadata-Version',
 'Name',
 'Version',
 'Summary',
 'Author',
 'Author-email',
 'License',
 'Project-URL',
 'Project-URL',
 'Project-URL',
 'Project-URL',
 'Keywords',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Classifier',
 'Requires-Python',
 'Description-Content-Type',
 'License-File',
 'Requires-Dist',
 'Provides-Extra',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Provides-Extra',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'Provides-Extra',
 'Requires-Dist',
 'Provides-Extra',
 'Requires-Dist',
 'Requires-Dist',
 'Requires-Dist',
 'R

In [126]:
import importlib.metadata

packages = {
    x.name: (
        x.version,
        x.metadata["Summary"],
        x.metadata["License"],
    )
    for x in importlib.metadata.distributions()
}
local_packages = DataFrame.from_dict(
    packages, orient="index", columns=["Version", "Summary", "License"]
)
local_packages

Unnamed: 0,Version,Summary,License
ml-env,0.8,A collection of packages for a ML environment.,MIT License\n===========\n\nCopyright (c) 2021...
debugpy,1.8.1,An implementation of the Debug Adapter Protoco...,MIT
black,24.4.2,The uncompromising code formatter.,MIT
hyperopt,0.2.7,Distributed Asynchronous Hyperparameter Optimi...,BSD
holidays,0.49,Generate and work with holidays in Python,Copyright (c) Vacanza Team and individual cont...
...,...,...,...
networkx,3.3,Python package for creating and manipulating g...,
memray,1.12.0,A memory profiler for Python applications,Apache 2.0
tensorflow-io-gcs-filesystem,0.37.0,TensorFlow IO,
chex,0.1.86,"Chex: Testing made fun, in JAX!",Apache 2.0


## Validate Version number

In [72]:
import re

VERSION_PATTERN = r"""
    v?
    (?:
        (?:(?P<epoch>[0-9]+)!)?                           # epoch
        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
        (?P<pre>                                          # pre-release
            [-_\.]?
            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
            [-_\.]?
            (?P<pre_n>[0-9]+)?
        )?
        (?P<post>                                         # post release
            (?:-(?P<post_n1>[0-9]+))
            |
            (?:
                [-_\.]?
                (?P<post_l>post|rev|r)
                [-_\.]?
                (?P<post_n2>[0-9]+)?
            )
        )?
        (?P<dev>                                          # dev release
            [-_\.]?
            (?P<dev_l>dev)
            [-_\.]?
            (?P<dev_n>[0-9]+)?
        )?
    )
    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
"""

regex = re.compile(
    r"^\s*" + VERSION_PATTERN + r"\s*$",
    re.VERBOSE | re.IGNORECASE,
)


def is_canonical(version: str):
    return re.match(regex, version) is not None


for pkg in packages:
    if not is_canonical(packages[pkg][0]):
        print(pkg, packages[pkg][0])

## Get Release Date

In [73]:
import aiohttp
from tqdm.asyncio import tqdm
from typing import Any


async def get_pypi_json(pkg: str, session: aiohttp.ClientSession) -> dict[str, Any]:
    url = f"https://pypi.org/pypi/{pkg}/json"
    async with session.get(url) as response:
        response.raise_for_status()
        return await response.json()


async def get_all_pypi_json(packages: list[str]) -> dict[str, dict[str, Any]]:
    async with aiohttp.ClientSession() as session:
        tasks = (get_pypi_json(pkg, session=session) for pkg in packages)
        responses = await tqdm.gather(*tasks)
        return {pkg: data for pkg, data in zip(packages, responses)}


# Usage example
pypi_data = await get_all_pypi_json(packages)
# print(len(pypi_data), pypi_data.keys())

100%|██████████| 511/511 [00:01<00:00, 261.69it/s]

511 dict_keys(['ml-env', 'debugpy', 'black', 'hyperopt', 'holidays', 'tensorflow-datasets', 'sphinx-math-dollar', 'google-pasta', 'filelock', 'types-decorator', 'freetype-py', 'tabulate', 'types-pyOpenSSL', 'tbparse', 'certifi', 'lsprotocol', 'nbstripout', 'threadpoolctl', 'hdmedians', 'jupyterlab', 'cycler', 'nest-asyncio', 'types-chardet', 'sphinx-copybutton', 'fastjsonschema', 'labmaze', 'pure-eval', 'pylint', 'opencensus-context', 'protobuf', 'sortedcontainers', 'tzdata', 'pexpect', 'requests', 'jupytext', 'frozenlist', 'nvidia-cuda-nvrtc-cu12', 'types-docutils', 'pycryptodome', 'pyaml', 'nfoursid', 'greenlet', 'jeepney', 'bleach', 'h11', 'oyaml', 'importlib_metadata', 'types-cffi', 'pytz', 'jupyterlab_server', 'Cython', 'fsspec', 'liac-arff', 'structlog', 'scikit-bio', 'google-api-core', 'referencing', 'Sphinx', 'jupyter_packaging', 'jupyter-resource-usage', 'pluggy', 'jsonpointer', 'argon2-cffi-bindings', 'opt-einsum', 'jupyterlab-lsp', 'anytree', 'jaxlib', 'scikit-image', 'sphin




In [None]:
import asyncio
import json
from urllib.request import urlopen


async def get_pypi_json(pkg: str) -> dict:
    url = f"https://pypi.org/pypi/{pkg}/json"
    loop = asyncio.get_event_loop()
    response = await loop.run_in_executor(None, urlopen, url)
    data = response.read()
    return json.loads(data)


async def get_all_pypi_json(packages: list[str]) -> dict:
    tasks = [get_pypi_json(pkg) for pkg in packages]
    responses = await asyncio.gather(*tasks)
    return {pkg: data for pkg, data in zip(packages, responses)}


# Usage example
pypi_data = await get_all_pypi_json(packages)
# print(pypi_data)

In [130]:
## summarize the data in DataFrame
from datetime import datetime
import warnings
from typing import TypeAlias

JSON: TypeAlias = dict[str, Any]


def get_release_version(version: str) -> tuple[int, ...]:
    match = re.match(regex, version)
    if match is None:
        warnings.warn(f"Invalid version: {version}")
        return (0,)
    return tuple(int(x) for x in match.group("release").split("."))


def get_release_date(releases: list[JSON]) -> datetime | None:
    uploads = [datetime.fromisoformat(release["upload_time"]) for release in releases]
    if not uploads:
        return None
    return min(uploads)


def get_lastest_version(pkg: str) -> tuple[str, datetime | None]:
    data = pypi_data[pkg]
    releases: dict[str, list[JSON]] = data["releases"]
    sorted_releases = sorted(releases, key=get_release_version)
    latest_release = sorted_releases[-1]
    return latest_release, get_release_date(releases[latest_release])


pypi_packages = {pkg: get_lastest_version(pkg) for pkg in pypi_data}
latest = DataFrame.from_dict(
    pypi_packages, orient="index", columns=["version", "date"]
).sort_values("date")



## Check for unmaintained packages

In [146]:
deprecated = latest.loc[latest.date < "2022"]
print(deprecated)

# check pyproject.toml for deprecated packages
#  load pyproject.toml as string

with open("pyproject.toml", encoding="utf8") as file:
    pyproject = file.read()

# regex match dependencies
for name in deprecated.index:
    regex = re.compile(rf"{name}")
    # show match and line number and line
    for i, line in enumerate(pyproject.split("\n")):
        if regex.search(line):
            print(f"pyproject.toml:{i+1}: {line} ({name})")

                         version                date
wget                         3.2 2015-10-22 15:26:37
asciitree                  0.3.3 2016-09-05 19:10:42
ipython-genutils           0.2.0 2017-03-13 22:12:25
webencodings               0.5.1 2017-04-05 20:21:32
ply                         3.11 2018-02-15 19:01:27
aiohttp-cors               0.7.0 2018-03-06 15:45:42
karma_sphinx_theme         0.0.8 2018-06-20 00:03:29
pickleshare                0.7.5 2018-09-25 19:17:35
wimpy                        0.6 2018-10-09 05:59:32
docker-pycreds             0.4.0 2018-11-29 03:26:49
sphinxcontrib-jsmath       1.0.1 2019-01-21 16:10:14
text-unidecode               1.3 2019-08-30 21:36:45
colorful                 0.6.0a1 2019-09-04 18:47:15
aiohttp                  4.0.0a1 2019-10-09 11:29:30
rfc3986-validator          0.1.1 2019-10-28 16:00:13
promise                      2.3 2019-12-18 07:31:43
astunparse                 1.6.3 2019-12-22 18:12:11
odfpy                      1.4.1 2020-01-18 16