Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions scripts/codex_package/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ one grouped `cargo build` command per package:
The default cargo profile is `dev-small` because local iteration should favor
fast, small builds. Release jobs should pass `--cargo-profile release`.

`rg` is not built from this repository, so it remains an input. If `--rg-bin` is
omitted, the builder looks in the existing `codex-cli/vendor/<target>/path/`
location.
`rg` is not built from this repository, so the builder fetches it from the
DotSlash manifest at `codex-cli/bin/rg`. Downloaded archives are cached under
`$TMPDIR/codex-package/<target>-rg` and are reused only after the recorded size
and SHA-256 digest have been verified. Pass `--rg-bin` to use a local ripgrep
executable instead.
7 changes: 5 additions & 2 deletions scripts/codex_package/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from .layout import build_package_dir
from .layout import prepare_package_dir
from .layout import validate_package_dir
from .ripgrep import resolve_rg_bin
from .targets import TARGET_SPECS
from .targets import PackageInputs
from .targets import resolve_rg_bin


def parse_args() -> argparse.Namespace:
Expand Down Expand Up @@ -69,7 +69,10 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--rg-bin",
type=Path,
help="Path to the ripgrep executable to place in codex-path/.",
help=(
"Optional local ripgrep executable override instead of fetching from "
"codex-cli/bin/rg."
),
)
return parser.parse_args()

Expand Down
195 changes: 195 additions & 0 deletions scripts/codex_package/ripgrep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Fetch ripgrep from the DotSlash manifest used by the npm package."""

import hashlib
import json
import shutil
import stat
import tarfile
import tempfile
import zipfile
from dataclasses import dataclass
from pathlib import Path
from urllib.parse import urlparse
from urllib.request import urlopen

from .targets import REPO_ROOT
from .targets import TargetSpec
from .targets import resolve_input_path


RG_MANIFEST = REPO_ROOT / "codex-cli" / "bin" / "rg"
DOWNLOAD_TIMEOUT_SECS = 60


@dataclass(frozen=True)
class RgArtifact:
size: int
digest: str
archive_format: str
archive_member: str
url: str


def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path:
if rg_bin is not None:
return resolve_input_path(rg_bin, "ripgrep executable", "--rg-bin")

return fetch_rg(spec)


def fetch_rg(
spec: TargetSpec,
*,
manifest_path: Path = RG_MANIFEST,
cache_root: Path | None = None,
) -> Path:
artifact = artifact_for_target(spec, manifest_path)
cache_dir = (cache_root or default_cache_root()) / f"{spec.target}-rg"
archive_path = cache_dir / archive_filename(artifact.url)

if not archive_is_valid(archive_path, artifact):
download_archive(artifact.url, archive_path)
try:
verify_archive(archive_path, artifact)
except RuntimeError:
archive_path.unlink(missing_ok=True)
raise

dest = cache_dir / spec.rg_name
extract_rg(archive_path, artifact, dest)
if not spec.is_windows:
mode = dest.stat().st_mode
dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
return dest


def artifact_for_target(spec: TargetSpec, manifest_path: Path) -> RgArtifact:
manifest = load_manifest(manifest_path)
try:
platform_info = manifest["platforms"][spec.dotslash_platform]
except KeyError as exc:
raise RuntimeError(
f"ripgrep manifest {manifest_path} is missing platform {spec.dotslash_platform!r}"
) from exc

providers = platform_info.get("providers")
if not providers:
raise RuntimeError(
f"ripgrep manifest {manifest_path} has no providers for {spec.dotslash_platform!r}"
)

hash_name = platform_info.get("hash")
if hash_name != "sha256":
raise RuntimeError(
f"Unsupported ripgrep hash {hash_name!r} for "
f"{spec.dotslash_platform!r}; expected sha256"
)

return RgArtifact(
size=int(platform_info["size"]),
digest=str(platform_info["digest"]),
archive_format=str(platform_info["format"]),
archive_member=str(platform_info["path"]),
url=str(providers[0]["url"]),
)


def load_manifest(manifest_path: Path) -> dict:
text = manifest_path.read_text(encoding="utf-8")
if text.startswith("#!"):
text = "\n".join(text.splitlines()[1:])
return json.loads(text)


def default_cache_root() -> Path:
return Path(tempfile.gettempdir()) / "codex-package"


def archive_filename(url: str) -> str:
filename = Path(urlparse(url).path).name
if not filename:
raise RuntimeError(f"Unable to determine archive filename from {url}")
return filename


def archive_is_valid(archive_path: Path, artifact: RgArtifact) -> bool:
if not archive_path.is_file():
return False
try:
verify_archive(archive_path, artifact)
except RuntimeError:
archive_path.unlink(missing_ok=True)
return False
return True


def verify_archive(archive_path: Path, artifact: RgArtifact) -> None:
actual_size = archive_path.stat().st_size
if actual_size != artifact.size:
raise RuntimeError(
f"ripgrep archive {archive_path} has size {actual_size}, expected {artifact.size}"
)

digest = hashlib.sha256()
with open(archive_path, "rb") as fh:
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
digest.update(chunk)

actual_digest = digest.hexdigest()
if actual_digest != artifact.digest:
raise RuntimeError(
f"ripgrep archive {archive_path} has sha256 {actual_digest}, "
f"expected {artifact.digest}"
)


def download_archive(url: str, archive_path: Path) -> None:
archive_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = archive_path.with_suffix(f"{archive_path.suffix}.tmp")
temp_path.unlink(missing_ok=True)
try:
with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response:
with open(temp_path, "wb") as out:
shutil.copyfileobj(response, out)
temp_path.replace(archive_path)
finally:
temp_path.unlink(missing_ok=True)


def extract_rg(archive_path: Path, artifact: RgArtifact, dest: Path) -> None:
dest.parent.mkdir(parents=True, exist_ok=True)
dest.unlink(missing_ok=True)

if artifact.archive_format == "tar.gz":
with tarfile.open(archive_path, "r:gz") as archive:
try:
member = archive.getmember(artifact.archive_member)
except KeyError as exc:
raise RuntimeError(
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
) from exc

extracted = archive.extractfile(member)
if extracted is None:
raise RuntimeError(
f"ripgrep archive member {artifact.archive_member!r} is not a file"
)
with extracted, open(dest, "wb") as out:
shutil.copyfileobj(extracted, out)
return

if artifact.archive_format == "zip":
with zipfile.ZipFile(archive_path) as archive:
try:
with archive.open(artifact.archive_member) as extracted:
with open(dest, "wb") as out:
shutil.copyfileobj(extracted, out)
except KeyError as exc:
raise RuntimeError(
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
) from exc
return

raise RuntimeError(
f"Unsupported ripgrep archive format {artifact.archive_format!r}; expected tar.gz or zip"
)
32 changes: 8 additions & 24 deletions scripts/codex_package/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class TargetSpec:
target: str
is_windows: bool
is_linux: bool
dotslash_platform: str

@property
def exe_suffix(self) -> str:
Expand Down Expand Up @@ -42,53 +43,43 @@ class PackageInputs:
target="x86_64-unknown-linux-musl",
is_windows=False,
is_linux=True,
dotslash_platform="linux-x86_64",
),
"aarch64-unknown-linux-musl": TargetSpec(
target="aarch64-unknown-linux-musl",
is_windows=False,
is_linux=True,
dotslash_platform="linux-aarch64",
),
"x86_64-apple-darwin": TargetSpec(
target="x86_64-apple-darwin",
is_windows=False,
is_linux=False,
dotslash_platform="macos-x86_64",
),
"aarch64-apple-darwin": TargetSpec(
target="aarch64-apple-darwin",
is_windows=False,
is_linux=False,
dotslash_platform="macos-aarch64",
),
"x86_64-pc-windows-msvc": TargetSpec(
target="x86_64-pc-windows-msvc",
is_windows=True,
is_linux=False,
dotslash_platform="windows-x86_64",
),
"aarch64-pc-windows-msvc": TargetSpec(
target="aarch64-pc-windows-msvc",
is_windows=True,
is_linux=False,
dotslash_platform="windows-aarch64",
),
}


def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path:
return resolve_input_path(
rg_bin,
default_rg_candidates(spec),
"ripgrep executable",
"--rg-bin",
)


def default_rg_candidates(spec: TargetSpec) -> list[Path]:
return [
REPO_ROOT / "codex-cli" / "vendor" / spec.target / "path" / spec.rg_name,
]


def resolve_input_path(
explicit_path: Path | None,
default_candidates: list[Path],
description: str,
flag_name: str,
) -> Path:
Expand All @@ -100,14 +91,7 @@ def resolve_input_path(
raise RuntimeError(f"{description} is not executable: {path}")
return path

for candidate in default_candidates:
if candidate.is_file():
return candidate.resolve()

candidates = "\n".join(f" - {candidate}" for candidate in default_candidates)
raise RuntimeError(
f"Could not find {description}. Pass {flag_name}, or create one of:\n{candidates}"
)
raise RuntimeError(f"Must specify {flag_name} for {description}.")


def is_executable(path: Path) -> bool:
Expand Down
Loading