In [None]:
# Old test to check whether `prepare_metadata_for_build_wheel` returns the same METADATA as `build_wheel` later
# It does return the same metadata :)

In [None]:
"""
docker run --net=host --cpus 6 --name sdist_test -it --workdir /app -v "$(pwd)/sdist_check:/app/sdist_check" -v "$(pwd)/resolve_prototype:/app/resolve_prototype" python:3.8 bash
python -m venv sdist_check/.venv
. sdist_check/.venv/bin/activate
pip install jupyter aiofiles build httpx importlib-metadata orjson pydantic respx tomlkit
pip install wheel setuptools-rust flit hatch # what we might need
pip install cppy numpy Cython maturin setuptools_scm
apt install -y libunwind-dev
export CARGO_HOME=/app/sdist_check/cargo_home
export RUSTUP_HOME=/app/sdist_check/rustup_home
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source "/app/sdist_check/cargo_home/env"
jupyter notebook --allow-root
"""

In [None]:
from pathlib import Path

sdist_check = Path("sdist_check")

names_and_versions = [
    tuple(i.split("=="))
    for i in sdist_check.joinpath("popular.txt").read_text().strip().splitlines()
]
print(names_and_versions)

In [None]:
import httpx
from resolve_prototype.pypi_api_types import ProjectVersionJsonResponse
import urllib.request

sdist_dir = sdist_check.joinpath("sdists")

In [None]:
sdist_dir.mkdir(exist_ok=True)
for name, version in names_and_versions:
    # The f"https://files.pythonhosted.org/packages/source/{name[0]}/{name}/{name}-{version}.tar.gz" url did not work due to normalization
    url = f"https://pypi.org/pypi/{name}/{version}/json"
    print(url)
    response = httpx.get(url, follow_redirects=True)
    response.raise_for_status()
    data = ProjectVersionJsonResponse(**response.json())
    for url_object in data.urls:
        if url_object.packagetype == "sdist":
            break
    else:
        raise RuntimeError(f"sdist not found {name} {version}")
    print(url_object.url)

    filename = str(url_object.url).split("/")[-1]
    downloaded_file = sdist_dir.joinpath(filename)
    if not downloaded_file.exists():
        urllib.request.urlretrieve(url_object.url, downloaded_file)

In [None]:
sdist_map = dict()
for file in sdist_dir.iterdir():
    norm_name = (
        file.name.lower()
        .replace(".tar.gz", "")
        .replace(".zip", "")
        .replace(".", "-")
        .replace("_", "-")
    )
    sdist_map[norm_name] = file

In [None]:
from resolve_prototype.sdist import ProjectHooksCaptureOutput
import zipfile
import shutil
from build import ProjectBuilder, BuildBackendException
from tempfile import TemporaryDirectory
from tqdm.auto import tqdm

for name, version in tqdm(names_and_versions):
    downloaded_file = sdist_map[
        f"{name}-{version}".lower().replace(".", "-").replace("_", "-")
    ]
    metadata_store = sdist_check.joinpath("metadata").joinpath(name)
    if metadata_store.is_dir() and len(list(metadata_store.iterdir())) == 2:
        continue
    metadata_store.mkdir(parents=True, exist_ok=True)

    with TemporaryDirectory() as temp_dir:
        temp_dir = Path(temp_dir)

        extracted1 = temp_dir.joinpath("extracted1")
        metadata_dir1 = temp_dir.joinpath("metadata_dir1")
        shutil.unpack_archive(downloaded_file, extracted1)
        [extracted1] = extracted1.iterdir()

        capture = ProjectHooksCaptureOutput()
        print(
            name,
            version,
            ProjectBuilder(
                extracted1, runner=capture.subprocess_runner
            ).get_requires_for_build("wheel"),
        )

        capture = ProjectHooksCaptureOutput()
        try:
            ProjectBuilder(extracted1, runner=capture.subprocess_runner).metadata_path(
                metadata_dir1
            )
        except BuildBackendException as e:
            raise RuntimeError(
                f"Failed to build metadata for {name} {version}: {e}\n"
                f"--- Stdout:\n{capture.stdout}\n"
                f"--- Stderr:\n{capture.stderr}\n"
                "---\n"
            )

        if metadata_store.joinpath("meta_only").is_dir():
            shutil.rmtree(metadata_store.joinpath("meta_only"))
        [metadata_dir1] = metadata_dir1.glob("*.dist-info")
        shutil.copytree(metadata_dir1, metadata_store.joinpath("meta_only"))

        extracted2 = temp_dir.joinpath("extracted2")
        wheel_dir2 = temp_dir.joinpath("wheel_dir2")
        extract_dir2 = temp_dir.joinpath("extract_dir2")
        shutil.unpack_archive(downloaded_file, extracted2)
        [extracted2] = extracted2.iterdir()

        capture = ProjectHooksCaptureOutput()
        try:
            ProjectBuilder(extracted2, runner=capture.subprocess_runner).build(
                "wheel", wheel_dir2
            )
        except BuildBackendException as e:
            raise RuntimeError(
                f"Failed to build metadata for {name} {version}: {e}\n"
                f"--- Stdout:\n{capture.stdout}\n"
                f"--- Stderr:\n{capture.stderr}\n"
                "---\n"
            )

        [wheel_file] = wheel_dir2.iterdir()
        with zipfile.ZipFile(wheel_file, "r") as zip_ref:
            zip_ref.extractall(extract_dir2)
        [metadata_dir2] = extract_dir2.glob("*.dist-info")

        if metadata_store.joinpath("full_build").is_dir():
            shutil.rmtree(metadata_store.joinpath("full_build"))
        shutil.copytree(metadata_dir2, metadata_store.joinpath("full_build"))

In [None]:
for project in Path("sdist_check").joinpath("metadata").iterdir():
    if not project.joinpath("full_build").joinpath("METADATA").exists():
        continue
    full_build = project.joinpath("full_build").joinpath("METADATA").read_text()
    meta_only = project.joinpath("meta_only").joinpath("METADATA").read_text()

    print(project.name, full_build == meta_only)

In [None]:
!cd sethmlarson_pypi_data
!curl -L -s https://github.com/sethmlarson/pypi-data/releases/download/2023.01.25/pypi.db.gz | gunzip -c > sethmlarson_pypi_data/pypi.db