diff --git a/backend/.env.dist.local b/backend/.env.dist.local
index 14c770b4dc..64365ea36a 100755
--- a/backend/.env.dist.local
+++ b/backend/.env.dist.local
@@ -183,3 +183,14 @@ ENRICHER_GITHUB_TOKENS=
ENRICHER_BATCH_SIZE=100
ENRICHER_REPO_UPDATE_INTERVAL_HOURS=24
ENRICHER_IDLE_SLEEP_SEC=60
+
+# osv-sync (Temporal-scheduled; see services/apps/packages_worker/src/osv/schedule.ts)
+# OSV_ECOSYSTEMS uses OSV's canonical bucket case (npm lowercase, Maven titlecase) because
+# the bucket URL //all.zip is case-sensitive (Maven/all.zip exists,
+# maven/all.zip 404s). The allowlist check and DB storage normalize to lowercase
+# internally per ADR-0001 §OSV "Ecosystem normalization", so downstream stays lowercase.
+OSV_BULK_BASE_URL=https://osv-vulnerabilities.storage.googleapis.com
+OSV_ECOSYSTEMS=npm,Maven
+OSV_TMP_DIR=/tmp/osv
+OSV_BATCH_SIZE=500
+OSV_DERIVE_BATCH_SIZE=1000
diff --git a/backend/src/osspckgs/migrations/V1779710880__initial_schema.sql b/backend/src/osspckgs/migrations/V1779710880__initial_schema.sql
index 396012d6ee..9c61a03e25 100644
--- a/backend/src/osspckgs/migrations/V1779710880__initial_schema.sql
+++ b/backend/src/osspckgs/migrations/V1779710880__initial_schema.sql
@@ -58,10 +58,12 @@ CREATE TABLE packages (
latest_release_at timestamptz,
dependent_packages_count int,
dependent_repos_count int,
- -- has_critical_vulnerability bool NOT NULL DEFAULT FALSE,
- -- Deferred: semantics undecided between (a) any advisory with no fixed_version vs
- -- (b) latest_version falls inside an affected semver range. Lateral join against
- -- advisory_packages used in queries until this is resolved.
+ -- has_critical_vulnerability: TRUE iff latest_version is inside an active
+ -- affected range of a critical advisory (CVSS >= 7.0) OR a MAL-* malicious-
+ -- package advisory matches the package. Maintained by the deriveCriticalFlag
+ -- activity in packages_worker/src/osv/. See ADR-0001 §`has_critical_vulnerability`
+ -- semantics for the option-b + MAL- override rationale.
+ has_critical_vulnerability bool NOT NULL DEFAULT FALSE,
criticality_score numeric(10, 4),
-- is_critical and last_rank_pass_at are not in the original pckgs.md spec; added so
-- the packages table can answer "is this package critical?" without joining packages_universe,
@@ -82,8 +84,12 @@ CREATE INDEX ON packages (ecosystem, name);
CREATE INDEX ON packages USING gin (keywords);
--- INDEX on has_critical_vulnerability removed — column is commented out above.
--- Uncomment both when semantics are decided.
+-- Partial index on has_critical_vulnerability TRUE rows only — that's the bucket
+-- the security overlay query needs ("list all packages with a known critical
+-- vuln"). The FALSE rows dominate the table and don't need an index.
+CREATE INDEX ON packages (has_critical_vulnerability)
+WHERE
+ has_critical_vulnerability;
CREATE INDEX ON packages (criticality_score DESC)
WHERE
@@ -569,6 +575,15 @@ CREATE TABLE advisories (
aliases text[], -- CVE-XXXX, GHSA-...
severity text, -- 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL'
cvss numeric(3, 1),
+ -- Provenance of the cvss value above. Lets downstream consumers distinguish
+ -- a real vendor-supplied vector from a synthesized qualitative fallback.
+ -- See ADR-0001 §CVSS scoring strategy. Allowed values:
+ -- 'osv_cvss_v3' numeric score from a CVSS_V3 vector
+ -- 'osv_cvss_v4' reserved; v4 numeric scoring deferred
+ -- 'osv_qualitative_fallback' synthesized from database_specific.severity
+ -- 'osv_malicious_package' MAL-* id with no CVSS vector
+ -- Extensible to 'ghsa' | 'nvd' as additional sources come online.
+ cvss_source text,
-- >= 7.0 intentional: treat HIGH + CRITICAL both as actionable
is_critical bool GENERATED ALWAYS AS (cvss >= 7.0) STORED,
summary text,
@@ -599,10 +614,27 @@ CREATE INDEX ON advisory_packages (package_id)
WHERE
package_id IS NOT NULL;
--- Version ranges affected by an advisory per package.
+-- Drives the resolveMissingPackageIds catch-up UPDATE in deriveCriticalFlag:
+-- the query filters WHERE package_id IS NULL and joins on (ecosystem,
+-- package_name). The non-partial (ecosystem, package_name) index above is
+-- usable here too (the planner just adds a Filter on package_id IS NULL), but
+-- as the table grows the vast majority of rows have package_id IS NOT NULL,
+-- so the non-partial scan ends up filtering out most of what it reads. This
+-- partial index only contains the still-unresolved rows, keeping it tiny
+-- regardless of total table size and making the daily catch-up O(unresolved)
+-- instead of O(total).
+CREATE INDEX ON advisory_packages (ecosystem, package_name)
+WHERE
+ package_id IS NULL;
+
+-- Version ranges affected by an advisory per package. Populated by the OSV
+-- ingest worker (packages_worker/src/osv) using introduced_version /
+-- fixed_version / last_affected. range_raw / unaffected_raw are reserved
+-- for the deps.dev BQ ingest worker (future): that worker writes the raw
+-- range strings without parsing into structured boundaries. The OSV upsert
+-- path only deletes rows where range_raw / unaffected_raw are both NULL,
+-- so deps.dev rows are not clobbered when OSV re-syncs.
-- COALESCE prevents silent duplicates when introduced_version is NULL.
--- BQ-sourced rows populate range_raw / unaffected_raw only; introduced/fixed/last_affected
--- are populated by a future range-parsing workstream.
CREATE TABLE advisory_affected_ranges (
id bigserial PRIMARY KEY,
advisory_package_id bigint NOT NULL REFERENCES advisory_packages (id),
@@ -613,7 +645,18 @@ CREATE TABLE advisory_affected_ranges (
unaffected_raw text -- raw UnaffectedVersions string from deps.dev BQ
);
-CREATE UNIQUE INDEX ON advisory_affected_ranges (advisory_package_id, COALESCE(introduced_version, ''));
+-- Full-tuple uniqueness so two ranges sharing introduced_version but differing
+-- in fixed_version or last_affected (cross-distro patches, partial fixes in a
+-- single advisory) both survive insertion. The narrower (advisory_package_id,
+-- introduced_version) form silently collapsed those cases to one row, dropping
+-- the wider range and under-reporting vulnerable windows in the derive step.
+-- See ADR-0001 §`advisory_affected_ranges` uniqueness scope.
+CREATE UNIQUE INDEX ON advisory_affected_ranges (
+ advisory_package_id,
+ COALESCE(introduced_version, ''),
+ COALESCE(fixed_version, ''),
+ COALESCE(last_affected, '')
+);
CREATE INDEX ON advisory_affected_ranges (advisory_package_id);
diff --git a/docs/adr/0001-oss-packages-design-decisions.md b/docs/adr/0001-oss-packages-design-decisions.md
index 3c1c528465..46daeddf89 100644
--- a/docs/adr/0001-oss-packages-design-decisions.md
+++ b/docs/adr/0001-oss-packages-design-decisions.md
@@ -10,17 +10,20 @@ The oss-packages domain is being built inside CDP as a new, independent capabili
## Scope and current status
-| Decision area | Status |
-| --- | --- |
-| Database placement | decided |
-| Worker architecture | decided |
-| Universe source and critical-package selection | decided (formula is a placeholder) |
-| Write semantics across sub-workers | decided |
-| Package → repository provenance | decided |
-| OSV as canonical security source | decided (`has_critical_vulnerability` flag deferred) |
-| Per-source ingestion strategies | decided (Sonatype API access pending) |
-| deps.dev coverage and gaps | decided |
-| Downloads timeline by tier | decided |
+| Decision area | Status |
+| ---------------------------------------------- | ------------------------------------- |
+| Database placement | decided |
+| Worker architecture | decided |
+| Universe source and critical-package selection | decided (formula is a placeholder) |
+| Write semantics across sub-workers | decided |
+| Package → repository provenance | decided |
+| OSV as canonical security source | decided |
+| CVSS scoring strategy | decided (v4 numeric scoring deferred) |
+| `has_critical_vulnerability` semantics | decided |
+| `advisory_affected_ranges` uniqueness scope | decided |
+| Per-source ingestion strategies | decided (Sonatype API access pending) |
+| deps.dev coverage and gaps | decided |
+| Downloads timeline by tier | decided |
---
@@ -34,11 +37,11 @@ We store all packages-domain data in a dedicated physical Postgres instance (`pa
**Partitioning rationale:**
-| Table | Strategy | Buckets | Hot query shape |
-| --- | --- | --- | --- |
-| `versions` | HASH(`package_id`) | 32 | Lookup by package — lands in one partition; ~2.8M rows each at 90M total |
-| `package_dependencies` | HASH(`depends_on_id`) | 64 | "Who depends on vulnerable package X?" — lands in one partition; ~18M rows each at 1.15B total |
-| `downloads_daily` | RANGE(`date`) via `pg_partman` | automatic | Time-series; pruning old partitions is straightforward |
+| Table | Strategy | Buckets | Hot query shape |
+| ---------------------- | ------------------------------ | --------- | ---------------------------------------------------------------------------------------------- |
+| `versions` | HASH(`package_id`) | 32 | Lookup by package — lands in one partition; ~2.8M rows each at 90M total |
+| `package_dependencies` | HASH(`depends_on_id`) | 64 | "Who depends on vulnerable package X?" — lands in one partition; ~18M rows each at 1.15B total |
+| `downloads_daily` | RANGE(`date`) via `pg_partman` | automatic | Time-series; pruning old partitions is straightforward |
**`package_dependencies` query trade-off**: partitioning on `depends_on_id` makes upstream queries fast — "which packages depend on X?" lands in one partition. The inverse — "what does package Y depend on?" (lookup by `package_id`, not `depends_on_id`) — is a cross-partition scan. The upstream direction is the security-critical hot path (vulnerability blast-radius analysis), so this trade-off is intentional.
@@ -90,17 +93,17 @@ The BigQuery free tier is approximately 1 TiB/month. Column projection and `Syst
Five sub-workers run concurrently (npm, Maven, OSV, GitHub, Docker Hub), all writing to the same `packages-db` schema. We define per-table write rules that allow concurrent writes without distributed locking:
-| Table | Rule |
-| --- | --- |
-| `packages` | Upsert on `purl`. Each worker only writes columns it owns; ecosystem isolation means column-level conflicts cannot occur in practice. |
-| `packages_universe` | Full truncate-and-replace on the weekly rank job. No other worker writes to this table. The truncate + bulk-insert must be wrapped in a single transaction or shadow-table swap to avoid a window of emptiness visible to the promotion query. |
-| `versions` | Append-only via `INSERT … ON CONFLICT DO NOTHING`. Yanked/deprecated status is a separate targeted `UPDATE (is_yanked = true) WHERE …`. |
-| `repos` | Registry workers (npm, Maven) do **not** write directly to `repos`. They write `package_repos` rows. The GitHub enricher — triggered when `repos.last_synced_at IS NULL` — upserts `repos` with metadata. Docker Hub worker adds `docker_*` columns on top. |
-| `package_repos` | Composite PK `(package_id, repo_url)`. Each `source` value ('declared', 'deps_dev', 'heuristic', 'manual') is a separate row — sources do not overwrite each other. |
-| `advisories` | Upsert on `osv_id`. OSV is the single source of truth; no other worker writes to this table. |
-| `maintainers` / `package_maintainers` | Upsert on `(ecosystem, username)`. Never delete — history is preserved. |
-| `downloads_daily` | Append-only time-series. Each `(package_id, date)` row is written once. npm and Maven workers own disjoint rows by ecosystem. Historical timelines are preserved — workers do not overwrite past dates. |
-| `downloads_last_30d` | Upsert on `(purl, end_date)`. Written by the weekly ranking worker only. The cached `packages_universe.downloads_last_30d` column must be updated in the same pass. |
+| Table | Rule |
+| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `packages` | Upsert on `purl`. Each worker only writes columns it owns; ecosystem isolation means column-level conflicts cannot occur in practice. |
+| `packages_universe` | Full truncate-and-replace on the weekly rank job. No other worker writes to this table. The truncate + bulk-insert must be wrapped in a single transaction or shadow-table swap to avoid a window of emptiness visible to the promotion query. |
+| `versions` | Append-only via `INSERT … ON CONFLICT DO NOTHING`. Yanked/deprecated status is a separate targeted `UPDATE (is_yanked = true) WHERE …`. |
+| `repos` | Registry workers (npm, Maven) do **not** write directly to `repos`. They write `package_repos` rows. The GitHub enricher — triggered when `repos.last_synced_at IS NULL` — upserts `repos` with metadata. Docker Hub worker adds `docker_*` columns on top. |
+| `package_repos` | Composite PK `(package_id, repo_url)`. Each `source` value ('declared', 'deps_dev', 'heuristic', 'manual') is a separate row — sources do not overwrite each other. |
+| `advisories` | Upsert on `osv_id`. OSV is the single source of truth; no other worker writes to this table. |
+| `maintainers` / `package_maintainers` | Upsert on `(ecosystem, username)`. Never delete — history is preserved. |
+| `downloads_daily` | Append-only time-series. Each `(package_id, date)` row is written once. npm and Maven workers own disjoint rows by ecosystem. Historical timelines are preserved — workers do not overwrite past dates. |
+| `downloads_last_30d` | Upsert on `(purl, end_date)`. Written by the weekly ranking worker only. The cached `packages_universe.downloads_last_30d` column must be updated in the same pass. |
The column-ownership rule is a social contract, not enforced by Postgres. Code review must catch cross-ecosystem or cross-source column writes using this table as the reference.
@@ -122,6 +125,7 @@ All repo URLs are **canonicalized** before insertion: scheme normalized to `http
The `packages` table retains `declared_repository_url` (raw) and `repository_url` (canonical highest-confidence match) as denormalized copies for quick access.
**Population order**:
+
1. Registry workers (npm, Maven) write `packages` and `package_repos` rows.
2. The GitHub enricher polls `repos` for rows where `last_synced_at IS NULL` (never enriched) or `last_synced_at < NOW() - INTERVAL ''` (stale). The re-sync interval is controlled via `ENRICHER_REPO_UPDATE_INTERVAL_HOURS`.
3. The enricher updates those rows with full metadata and sets `last_synced_at`.
@@ -146,24 +150,24 @@ We ingest the OSV bulk ZIP — full download daily, not incremental — into `ad
**Severity fallback** (many OSV records carry no CVSS vector):
| Qualitative severity | Synthesized cvss |
-| --- | --- |
-| `CRITICAL` | 9.5 |
-| `HIGH` | 7.5 |
-| `MEDIUM` | 5.0 |
-| `LOW` | 3.0 |
+| -------------------- | ---------------- |
+| `CRITICAL` | 9.5 |
+| `HIGH` | 7.5 |
+| `MEDIUM` | 5.0 |
+| `LOW` | 3.0 |
The qualitative `severity` tag is stored alongside the synthesized value so consumers can distinguish real CVSS from approximations.
**Ecosystem normalization**:
| OSV raw value | Stored as |
-| --- | --- |
-| `npm` | `npm` |
-| `Maven` | `maven` |
+| ------------- | --------- |
+| `npm` | `npm` |
+| `Maven` | `maven` |
**Derivation cadence**: `deriveCriticalFlag` runs at the end of each OSV sync pass in the same worker loop — no separate scheduler needed.
-**`has_critical_vulnerability` flag**: deferred — semantics not yet decided (see Open Questions). Callers join `advisory_packages` directly in the interim.
+**`has_critical_vulnerability` flag**: see the [§`has_critical_vulnerability` semantics](#has_critical_vulnerability-semantics) section below.
The ingest worker must stream-parse the bulk ZIP rather than loading it into memory.
@@ -171,6 +175,72 @@ The ingest worker must stream-parse the bulk ZIP rather than loading it into mem
---
+### CVSS scoring strategy
+
+OSV records carry severity as a `severity[]` array of `{type, score}` entries, where `type` is `CVSS_V2 | CVSS_V3 | CVSS_V4 | …` and `score` is the **vector string** (e.g. `CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H`), not a numeric base score. We compute the numeric base score inline from the FIRST v3.1 specification (~80 LOC in `services/apps/packages_worker/src/osv/cvssScoring.ts`), without a third-party CVSS dependency.
+
+Scoring fallback chain (in `extractSeverity.ts`):
+
+1. `MAL-*` id → `cvss = NULL`, `cvss_source = 'osv_malicious_package'`.
+2. CVSS_V3 vector → compute inline, `cvss_source = 'osv_cvss_v3'`.
+3. `database_specific.severity` qualitative tag → synthesized per the severity-fallback table in §OSV, `cvss_source = 'osv_qualitative_fallback'`.
+4. Nothing → `cvss = NULL` and `cvss_source = NULL`.
+
+Scope (`S`) metric is validated against `{U, C}` up front; a missing or invalid `S` returns `null` from the score function rather than silently falling through to the Scope:Unchanged formula.
+
+**CVSS v4 is deferred.** Computing v4 base scores requires the ~270-entry macro-vector lookup table from the FIRST v4.0 spec; the validation effort to verify it against reference vectors is its own slice of work. V4-only OSV records (no V3 sibling vector and no qualitative tag — ~1.1% of advisories as of 2026-05-27) land with `cvss = NULL`. The `cvss_source` column allows downstream consumers to distinguish synthesized-from-qualitative scores (`osv_qualitative_fallback`) from real V3 base scores (`osv_cvss_v3`), and the V4-NULL bucket is queryable for follow-up sizing (`SELECT COUNT(*) FROM advisories WHERE cvss_source IS NULL`).
+
+The inline implementation is unit-tested against six FIRST-published reference vectors (log4shell 10.0, shellshock 9.8, heartbleed 7.5, ChangeCipherSpec 4.8, a low-end vector at 3.3, and an all-None at 0.0), plus regression guards for missing-`S` and `S:X`.
+
+**Trade-off considered:** adopting a third-party CVSS package (e.g. npm `cvss` or a v4-capable alternative) was rejected — `cvss` covers v2/v3 only, so we'd still own the v4 problem; v4-capable libraries are recent and require validating against reference vectors anyway. Inline code we can unit-test against FIRST-published scores is the lower-risk path for a security-critical formula.
+
+**Decided**: 2026-05-27
+
+---
+
+### `has_critical_vulnerability` semantics
+
+`packages.has_critical_vulnerability = TRUE` iff there exists an advisory `a` such that:
+
+- `a.is_critical = TRUE` (CVSS ≥ 7.0) OR `a.osv_id LIKE 'MAL-%'` (malicious-package report), AND
+- The package's current `latest_version` falls inside one of `a`'s affected ranges per the ecosystem comparator (semver for npm, a Maven `ComparableVersion`-style comparator for Maven).
+
+A range `(introduced, fixed, last_affected)` matches `latest_version` when:
+
+- `introduced IS NULL OR introduced = '0' OR latest_version >= introduced`, AND
+- `fixed IS NULL OR latest_version < fixed`, AND
+- `last_affected IS NULL OR latest_version <= last_affected`.
+
+This is **option (b)** (latest_version inside an active range), plus a **MAL- override** so malicious-package reports flip the flag regardless of CVSS — the XZ-style maintainer-compromise case from the Osprey memo. ~213k of 220k npm OSV records are `MAL-*` with `cvss = NULL`, so option (b) on its own would miss the dominant security signal.
+
+**Why not option (a)** (any critical advisory exists for the package name, regardless of version): option (a) over-reports — a CVE patched in v1.0 flags a package now on v9.0 — and under-reports when an advisory has multiple `affected[]` ranges where only some are patched. The actionable consumer question is "is the version I'd install today vulnerable?", and that's option (b).
+
+**Why not SQL-only derivation:** Postgres has no native semver or Maven `ComparableVersion` comparator. Implementing either as a `plpgsql` function is a significantly larger maintenance surface than the ~120 LOC TypeScript equivalent and would need re-validation against every Postgres minor-version upgrade.
+
+**Maintenance.** `deriveCriticalFlag` (the second activity in the `osvSync` workflow) recomputes the flag for every package whose `latest_version` is non-null. Both the FALSE → TRUE and TRUE → FALSE transitions are handled idempotently. A catch-up resolver populates `advisory_packages.package_id` for advisories that arrived before the matching package was ingested — at most one OSV cycle lag (~24h) for late-arriving packages.
+
+The ecosystem-specific comparators are TypeScript (`services/apps/packages_worker/src/osv/versionCompare.ts`), unit-tested with 30 cases covering Maven qualifier ranks (alpha < beta < milestone < rc < snapshot < ga/final < sp), qualifier aliases (`final` / `release` / `ga` / empty all equal), cross-ecosystem null returns, and the `1.0-final == 1.0` edge case.
+
+**Known gap.** Packages without `latest_version` are skipped and remain FALSE regardless of matching advisories. V4-only advisories without a V3 sibling or qualitative tag (`cvss_source IS NULL`) do not contribute to the flag — see the CVSS scoring strategy section.
+
+**Decided**: 2026-05-28 (resolves the prior open question on this flag)
+
+---
+
+### `advisory_affected_ranges` uniqueness scope
+
+`advisory_affected_ranges` uses a full-tuple unique index `(advisory_package_id, COALESCE(introduced_version, ''), COALESCE(fixed_version, ''), COALESCE(last_affected, ''))` rather than the narrower `(advisory_package_id, introduced_version)`.
+
+The narrower form forces two ranges that share an `introduced_version` but differ in `fixed_version` or `last_affected` to collapse into one row — a real OSV case (cross-distro patches, partial fixes within a single advisory) — and silently drops the wider range. When the surviving range is the narrower one, the package's actual vulnerable window is under-reported and `has_critical_vulnerability` returns FALSE for versions inside the wider range. The full-tuple key preserves the §Write semantics principle "one package has many version ranges; no denormalization."
+
+The application-side `dedupeRanges` in `upsertAdvisory.ts` keys on the same full tuple so the pre-flight matches the database constraint exactly. Truly identical tuples (the original "OSV emits a redundant event on the same line" case) still collapse; ranges that differ in any component are all preserved.
+
+Local verification against the live OSV dataset (2026-05-28) showed the multi-range advisory_packages count was unchanged from the narrow-index baseline — current OSV data doesn't exercise the cross-distro multi-range path in practice. The fix is correctness-only on today's dataset; the bug it prevents is real but unreached.
+
+**Decided**: 2026-05-28
+
+---
+
### Per-source ingestion strategies
#### npm
@@ -224,67 +294,67 @@ deps.dev is the primary source for package identity, dependents, advisories, and
#### packages
-| Column | From deps.dev? | Source if not |
-| --- | --- | --- |
-| `purl`, `ecosystem`, `namespace`, `name` | yes | — |
-| `description`, `licenses`, `latest_version` | yes | — |
-| `declared_repository_url`, `homepage` | yes | — |
-| `first_release_at`, `latest_release_at`, `versions_count` | yes | — |
-| `dependent_packages_count` | yes (via `DependentsLatest`) | — |
-| `registry_url` | no | npm registry / crates.io / PyPI |
-| `status` | no | npm registry (deprecated/unpublished flag; no deps.dev equivalent) |
-| `licenses_raw` | no | npm registry / crates.io (raw SPDX before normalization) |
-| `keywords` | no | npm registry / PyPI / crates.io |
-| `dist_tags_latest`, `_next`, `_beta` | no | npm registry only |
-| `dependent_repos_count` | no | derived in Postgres: `COUNT(DISTINCT repo_id)` via `package_repos` |
-| `criticality_score`, `is_critical` | no | internal — `rank_packages_universe()` |
+| Column | From deps.dev? | Source if not |
+| --------------------------------------------------------- | ---------------------------- | ------------------------------------------------------------------ |
+| `purl`, `ecosystem`, `namespace`, `name` | yes | — |
+| `description`, `licenses`, `latest_version` | yes | — |
+| `declared_repository_url`, `homepage` | yes | — |
+| `first_release_at`, `latest_release_at`, `versions_count` | yes | — |
+| `dependent_packages_count` | yes (via `DependentsLatest`) | — |
+| `registry_url` | no | npm registry / crates.io / PyPI |
+| `status` | no | npm registry (deprecated/unpublished flag; no deps.dev equivalent) |
+| `licenses_raw` | no | npm registry / crates.io (raw SPDX before normalization) |
+| `keywords` | no | npm registry / PyPI / crates.io |
+| `dist_tags_latest`, `_next`, `_beta` | no | npm registry only |
+| `dependent_repos_count` | no | derived in Postgres: `COUNT(DISTINCT repo_id)` via `package_repos` |
+| `criticality_score`, `is_critical` | no | internal — `rank_packages_universe()` |
#### versions
-| Column | From deps.dev? | Source if not |
-| --- | --- | --- |
-| `number`, `published_at` | yes | — |
-| `is_prerelease` | yes (derived: `NOT VersionInfo.IsRelease`) | — |
-| `license` | yes (per-version, `PackageVersionsLatest`) | — |
-| `is_latest` | no | derived: `number = packages.latest_version` |
-| `is_yanked` | no | npm registry (deprecated flag per version); crates.io API |
+| Column | From deps.dev? | Source if not |
+| ------------------------ | ------------------------------------------ | --------------------------------------------------------- |
+| `number`, `published_at` | yes | — |
+| `is_prerelease` | yes (derived: `NOT VersionInfo.IsRelease`) | — |
+| `license` | yes (per-version, `PackageVersionsLatest`) | — |
+| `is_latest` | no | derived: `number = packages.latest_version` |
+| `is_yanked` | no | npm registry (deprecated flag per version); crates.io API |
#### repos
-| Column | From deps.dev? | Source if not |
-| --- | --- | --- |
-| `url`, `host`, `owner`, `name` | yes (`ProjectsLatest`) | — |
-| `description`, `homepage`, `stars`, `forks`, `open_issues` | yes | — |
-| `raw_project_type`, `raw_project_name` | yes | — |
-| `primary_language`, `topics`, `watchers` | no | GitHub API (`github-repos-enricher`) |
-| `last_commit_at`, `archived`, `disabled`, `is_fork`, `created_at` | no | GitHub API |
-| `scorecard_score`, `scorecard_last_run_at` | no | `bigquery-public-data.openssf.scorecardcron_v2_latest` |
+| Column | From deps.dev? | Source if not |
+| ----------------------------------------------------------------- | ---------------------- | ------------------------------------------------------ |
+| `url`, `host`, `owner`, `name` | yes (`ProjectsLatest`) | — |
+| `description`, `homepage`, `stars`, `forks`, `open_issues` | yes | — |
+| `raw_project_type`, `raw_project_name` | yes | — |
+| `primary_language`, `topics`, `watchers` | no | GitHub API (`github-repos-enricher`) |
+| `last_commit_at`, `archived`, `disabled`, `is_fork`, `created_at` | no | GitHub API |
+| `scorecard_score`, `scorecard_last_run_at` | no | `bigquery-public-data.openssf.scorecardcron_v2_latest` |
#### advisories
-| Column | From deps.dev? | Source if not |
-| --- | --- | --- |
-| `osv_id`, `source`, `source_url`, `summary`, `details` | yes (`AdvisoriesLatest`) | — |
-| `cvss`, `severity`, `aliases`, `published_at` | yes | — |
-| `modified_at` | no | tracked in-house on each re-sync |
+| Column | From deps.dev? | Source if not |
+| ------------------------------------------------------ | ------------------------ | -------------------------------- |
+| `osv_id`, `source`, `source_url`, `summary`, `details` | yes (`AdvisoriesLatest`) | — |
+| `cvss`, `severity`, `aliases`, `published_at` | yes | — |
+| `modified_at` | no | tracked in-house on each re-sync |
#### advisory_affected_ranges
-| Column | From deps.dev? | Source if not |
-| --- | --- | --- |
-| `range_raw`, `unaffected_raw` | yes | — |
-| `introduced_version`, `fixed_version`, `last_affected` | no | future range-parsing workstream (parses `range_raw`) |
+| Column | From deps.dev? | Source if not |
+| ------------------------------------------------------ | -------------- | ---------------------------------------------------- |
+| `range_raw`, `unaffected_raw` | yes | — |
+| `introduced_version`, `fixed_version`, `last_affected` | no | future range-parsing workstream (parses `range_raw`) |
#### Tables with zero deps.dev coverage
-| Table | Owning source |
-| --- | --- |
-| `repo_scorecard_checks` | `bigquery-public-data.openssf.scorecardcron_v2_latest` |
-| `repo_docker` | Docker Hub API / GHCR |
-| `maintainers`, `package_maintainers` | npm registry, crates.io, PyPI |
-| `package_funding_links` | npm registry (`funding` field) |
-| `package_name_history` | internal CDP tracking only |
-| `downloads_daily`, `downloads_last_30d` | registry APIs / BigQuery — see Downloads section |
+| Table | Owning source |
+| --------------------------------------- | ------------------------------------------------------ |
+| `repo_scorecard_checks` | `bigquery-public-data.openssf.scorecardcron_v2_latest` |
+| `repo_docker` | Docker Hub API / GHCR |
+| `maintainers`, `package_maintainers` | npm registry, crates.io, PyPI |
+| `package_funding_links` | npm registry (`funding` field) |
+| `package_name_history` | internal CDP tracking only |
+| `downloads_daily`, `downloads_last_30d` | registry APIs / BigQuery — see Downloads section |
Any new column added to the schema or new deps.dev table exposed requires an amendment to this section. If a sub-worker discovers coverage drift (deps.dev adds or removes fields), the change in ownership must be made explicit here — do not silently fill what was previously listed as a gap.
@@ -300,10 +370,10 @@ Tier 2 (`packages`) downloads are stored in `downloads_daily` — one row per `(
`downloads_last_30d` is keyed by `purl` (not `packages_universe.id`) so rows survive the weekly truncation of `packages_universe`.
-| Table | Tier | Grain | Unique key | PK | Partitioning |
-| --- | --- | --- | --- | --- | --- |
-| `downloads_daily` | 2 (`packages`) | one row per package per day | `(package_id, date)` | `(id, date)` | RANGE on `date` via pg_partman |
-| `downloads_last_30d` | 3 (`packages_universe`) | one row per purl per rolling 30-day window | `(purl, end_date)` | `(id, end_date)` | RANGE on `end_date` |
+| Table | Tier | Grain | Unique key | PK | Partitioning |
+| -------------------- | ----------------------- | ------------------------------------------ | -------------------- | ---------------- | ------------------------------ |
+| `downloads_daily` | 2 (`packages`) | one row per package per day | `(package_id, date)` | `(id, date)` | RANGE on `date` via pg_partman |
+| `downloads_last_30d` | 3 (`packages_universe`) | one row per purl per rolling 30-day window | `(purl, end_date)` | `(id, end_date)` | RANGE on `end_date` |
Upsert pattern for `downloads_last_30d`:
@@ -326,7 +396,6 @@ A package promoted from Tier 3 to Tier 2 (becomes critical) will have rolling-wi
## Open questions / in-flight
- **Sonatype Central Stats API access** — not confirmed as of 2026-05-27. If unavailable by day 5, Maven download counts will be absent from the week-2 demo (`downloads_last_month` NULL for Maven rows; disclose to stakeholders).
-- **`has_critical_vulnerability` flag** — currently deferred; the column is commented out in the schema. The open question is what it should mean: (a) any critical advisory exists for this package name regardless of version, or (b) the package's latest version actually falls inside an active affected range. Option (b) requires evaluating OSV's `introduced` / `fixed` / `last_affected` range fields using semver (npm) or Maven's own version comparator — non-trivial work. Until resolved, callers join `advisory_packages` directly instead of reading a flag.
- **criticality_score formula** — the placeholder formula (`X * downloadsCount + Y * dependentCount`) has not been validated against known critical packages. Final formula is yet to be defined.
- **pg_partman + pg_cron setup** — must be confirmed active in the OCI environment before download workers start; `downloads_daily` and `downloads_last_30d` inserts will fail if monthly partitions are not pre-created.
@@ -335,12 +404,14 @@ A package promoted from Tier 3 to Tier 2 (becomes critical) will have rolling-wi
## Changelog
- 2026-05-27 — initial record
+- 2026-05-28 — folded standalone ADR-0003 (`has_critical_vulnerability` semantics), ADR-0005 (CVSS scoring strategy), and ADR-0006 (`advisory_affected_ranges` uniqueness scope) into this living record; standalone files removed. Resolved the prior open question on `has_critical_vulnerability` (option b + MAL- override). ADR-0004 (standalone-bin vs Temporal) was removed before merging — the worker architecture decision in this ADR supersedes it.
---
## Note on promotion to production
At first production release of oss-packages, the team decides whether to:
+
- seal ADR-0001 as `accepted` (frozen) and write new ADRs for any post-release changes, or
- split it into per-area ADRs for clearer long-term ownership.
diff --git a/docs/adr/README.md b/docs/adr/README.md
index 4511900ec3..6b0395b10d 100644
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -6,13 +6,14 @@ Use the `/adr` skill in Claude Code to record new ADRs or query past decisions.
## Index
-| ADR | Title | Status | Date |
-| --- | ----- | ------ | ---- |
+| ADR | Title | Status | Date |
+| --------------------------------------------------- | ---------------------------------------- | ------ | ---------- |
| [ADR-0001](./0001-oss-packages-design-decisions.md) | OSS packages — design decisions (living) | living | 2026-05-27 |
## Why ADRs?
The codebase is in active transition across several axes (see `CLAUDE.md`). ADRs provide a durable record of:
+
- Why old patterns are being replaced (e.g. Sequelize → pg-promise)
- What alternatives were considered before choosing the current approach
- What trade-offs were accepted
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 56850aa78a..6b8822f0f4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1312,25 +1312,43 @@ importers:
'@crowd/logging':
specifier: workspace:*
version: link:../../libs/logging
+ '@temporalio/activity':
+ specifier: ~1.11.8
+ version: 1.11.8
'@temporalio/client':
specifier: ~1.11.8
version: 1.11.8
'@temporalio/workflow':
specifier: ~1.11.8
version: 1.11.8
+ semver:
+ specifier: ^7.6.0
+ version: 7.6.0
tsx:
specifier: ^4.7.1
version: 4.7.3
typescript:
specifier: ^5.6.3
version: 5.6.3
+ unzipper:
+ specifier: ^0.12.3
+ version: 0.12.3
devDependencies:
'@types/node':
specifier: ^20.8.2
version: 20.12.7
+ '@types/semver':
+ specifier: ^7.5.8
+ version: 7.5.8
+ '@types/unzipper':
+ specifier: ^0.10.10
+ version: 0.10.11
nodemon:
specifier: ^3.0.1
version: 3.1.0
+ vitest:
+ specifier: ^3.2.4
+ version: 3.2.4(@types/debug@4.1.12)(@types/node@20.12.7)(terser@5.43.1)
services/apps/pcc_sync_worker:
dependencies:
@@ -4836,6 +4854,9 @@ packages:
'@types/triple-beam@1.3.5':
resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==}
+ '@types/unzipper@0.10.11':
+ resolution: {integrity: sha512-D25im2zjyMCcgL9ag6N46+wbtJBnXIr7SI4zHf9eJD2Dw2tEB5e+p5MYkrxKIVRscs5QV0EhtU9rgXSPx90oJg==}
+
'@types/uuid@9.0.8':
resolution: {integrity: sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==}
@@ -4964,6 +4985,7 @@ packages:
'@ungap/structured-clone@1.2.0':
resolution: {integrity: sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==}
+ deprecated: Potential CWE-502 - Update to 1.3.1 or higher
'@vercel/ncc@0.38.1':
resolution: {integrity: sha512-IBBb+iI2NLu4VQn3Vwldyi2QwaXt5+hTyh58ggAMoCGE6DJmPvwL3KPBWcJl1m9LYPChBLE980Jw+CS4Wokqxw==}
@@ -6125,6 +6147,9 @@ packages:
resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
engines: {node: '>= 0.4'}
+ duplexer2@0.1.4:
+ resolution: {integrity: sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA==}
+
duplexer3@0.1.5:
resolution: {integrity: sha512-1A8za6ws41LQgv9HrE/66jyC5yuSjQ3L/KOpFtoBilsAK2iA2wuS5rTt1OCzIvtS2V7nVmedsUU+DGRcjBmOYA==}
@@ -6731,6 +6756,10 @@ packages:
resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
engines: {node: '>= 0.6'}
+ fs-extra@11.3.5:
+ resolution: {integrity: sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==}
+ engines: {node: '>=14.14'}
+
fs-extra@4.0.3:
resolution: {integrity: sha512-q6rbdDd1o2mAnQreO7YADIxf/Whx4AHBiRf6d+/cVT8h44ss+lHgxf1FemcqDnQt9X3ct4McHr+JMGlYSsK7Cg==}
@@ -6871,11 +6900,11 @@ packages:
glob@6.0.4:
resolution: {integrity: sha512-MKZeRNyYZAVVVG1oZeLaWie1uweH40m9AZwIwxyPbTSX4hHrVYSzLg0Ro5Z5R7XKkIX+Cc6oD1rqeDJnwsB8/A==}
- deprecated: Glob versions prior to v9 are no longer supported
+ deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
glob@7.2.3:
resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==}
- deprecated: Glob versions prior to v9 are no longer supported
+ deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
global-directory@4.0.1:
resolution: {integrity: sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q==}
@@ -7546,6 +7575,9 @@ packages:
jsonfile@4.0.0:
resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==}
+ jsonfile@6.2.1:
+ resolution: {integrity: sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==}
+
jsonparse@1.3.1:
resolution: {integrity: sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg==}
engines: {'0': node >= 0.2.0}
@@ -9784,6 +9816,10 @@ packages:
resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==}
engines: {node: '>= 4.0.0'}
+ universalify@2.0.1:
+ resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
+ engines: {node: '>= 10.0.0'}
+
unpipe@1.0.0:
resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
engines: {node: '>= 0.8'}
@@ -9792,6 +9828,9 @@ packages:
resolution: {integrity: sha512-KK8xQ1mkzZeg9inewmFVDNkg3l5LUhoq9kN6iWYB/CC9YMG8HA+c1Q8HwDe6dEX7kErrEVNVBO3fWsVq5iDgtw==}
engines: {node: '>=8'}
+ unzipper@0.12.3:
+ resolution: {integrity: sha512-PZ8hTS+AqcGxsaQntl3IRBw65QrBI6lxzqDEL7IAo/XCEqRTKGfOX56Vea5TH9SZczRVxuzk1re04z/YjuYCJA==}
+
update-browserslist-db@1.1.3:
resolution: {integrity: sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==}
hasBin: true
@@ -9842,6 +9881,7 @@ packages:
uuid@9.0.1:
resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==}
+ deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028).
hasBin: true
v8-compile-cache-lib@3.0.1:
@@ -13822,6 +13862,10 @@ snapshots:
'@types/triple-beam@1.3.5': {}
+ '@types/unzipper@0.10.11':
+ dependencies:
+ '@types/node': 20.12.7
+
'@types/uuid@9.0.8': {}
'@types/validator@13.11.9': {}
@@ -14024,6 +14068,14 @@ snapshots:
optionalDependencies:
vite: 5.4.21(@types/node@18.19.31)(terser@5.43.1)
+ '@vitest/mocker@3.2.4(vite@5.4.21(@types/node@20.12.7)(terser@5.43.1))':
+ dependencies:
+ '@vitest/spy': 3.2.4
+ estree-walker: 3.0.3
+ magic-string: 0.30.21
+ optionalDependencies:
+ vite: 5.4.21(@types/node@20.12.7)(terser@5.43.1)
+
'@vitest/pretty-format@3.2.4':
dependencies:
tinyrainbow: 2.0.0
@@ -15312,6 +15364,10 @@ snapshots:
es-errors: 1.3.0
gopd: 1.2.0
+ duplexer2@0.1.4:
+ dependencies:
+ readable-stream: 2.3.8
+
duplexer3@0.1.5: {}
duplexify@3.7.1:
@@ -16171,6 +16227,12 @@ snapshots:
fresh@0.5.2: {}
+ fs-extra@11.3.5:
+ dependencies:
+ graceful-fs: 4.2.11
+ jsonfile: 6.2.1
+ universalify: 2.0.1
+
fs-extra@4.0.3:
dependencies:
graceful-fs: 4.2.11
@@ -17020,6 +17082,12 @@ snapshots:
optionalDependencies:
graceful-fs: 4.2.11
+ jsonfile@6.2.1:
+ dependencies:
+ universalify: 2.0.1
+ optionalDependencies:
+ graceful-fs: 4.2.11
+
jsonparse@1.3.1: {}
jsonpointer@5.0.1: {}
@@ -19476,10 +19544,20 @@ snapshots:
universalify@0.1.2: {}
+ universalify@2.0.1: {}
+
unpipe@1.0.0: {}
untildify@4.0.0: {}
+ unzipper@0.12.3:
+ dependencies:
+ bluebird: 3.7.2
+ duplexer2: 0.1.4
+ fs-extra: 11.3.5
+ graceful-fs: 4.2.11
+ node-int64: 0.4.0
+
update-browserslist-db@1.1.3(browserslist@4.25.1):
dependencies:
browserslist: 4.25.1
@@ -19585,6 +19663,24 @@ snapshots:
- supports-color
- terser
+ vite-node@3.2.4(@types/node@20.12.7)(terser@5.43.1):
+ dependencies:
+ cac: 6.7.14
+ debug: 4.4.3
+ es-module-lexer: 1.7.0
+ pathe: 2.0.3
+ vite: 5.4.21(@types/node@20.12.7)(terser@5.43.1)
+ transitivePeerDependencies:
+ - '@types/node'
+ - less
+ - lightningcss
+ - sass
+ - sass-embedded
+ - stylus
+ - sugarss
+ - supports-color
+ - terser
+
vite@5.4.21(@types/node@18.19.31)(terser@5.43.1):
dependencies:
esbuild: 0.21.5
@@ -19595,6 +19691,16 @@ snapshots:
fsevents: 2.3.3
terser: 5.43.1
+ vite@5.4.21(@types/node@20.12.7)(terser@5.43.1):
+ dependencies:
+ esbuild: 0.21.5
+ postcss: 8.5.8
+ rollup: 4.60.1
+ optionalDependencies:
+ '@types/node': 20.12.7
+ fsevents: 2.3.3
+ terser: 5.43.1
+
vitest@3.2.4(@types/debug@4.1.12)(@types/node@18.19.31)(terser@5.43.1):
dependencies:
'@types/chai': 5.2.3
@@ -19634,6 +19740,45 @@ snapshots:
- supports-color
- terser
+ vitest@3.2.4(@types/debug@4.1.12)(@types/node@20.12.7)(terser@5.43.1):
+ dependencies:
+ '@types/chai': 5.2.3
+ '@vitest/expect': 3.2.4
+ '@vitest/mocker': 3.2.4(vite@5.4.21(@types/node@20.12.7)(terser@5.43.1))
+ '@vitest/pretty-format': 3.2.4
+ '@vitest/runner': 3.2.4
+ '@vitest/snapshot': 3.2.4
+ '@vitest/spy': 3.2.4
+ '@vitest/utils': 3.2.4
+ chai: 5.3.3
+ debug: 4.4.3
+ expect-type: 1.3.0
+ magic-string: 0.30.21
+ pathe: 2.0.3
+ picomatch: 4.0.4
+ std-env: 3.10.0
+ tinybench: 2.9.0
+ tinyexec: 0.3.2
+ tinyglobby: 0.2.15
+ tinypool: 1.1.1
+ tinyrainbow: 2.0.0
+ vite: 5.4.21(@types/node@20.12.7)(terser@5.43.1)
+ vite-node: 3.2.4(@types/node@20.12.7)(terser@5.43.1)
+ why-is-node-running: 2.3.0
+ optionalDependencies:
+ '@types/debug': 4.1.12
+ '@types/node': 20.12.7
+ transitivePeerDependencies:
+ - less
+ - lightningcss
+ - msw
+ - sass
+ - sass-embedded
+ - stylus
+ - sugarss
+ - supports-color
+ - terser
+
watchpack@2.4.4:
dependencies:
glob-to-regexp: 0.4.1
diff --git a/services/apps/packages_worker/package.json b/services/apps/packages_worker/package.json
index 8c822bd7b1..af8f62dde0 100644
--- a/services/apps/packages_worker/package.json
+++ b/services/apps/packages_worker/package.json
@@ -11,7 +11,8 @@
"lint": "npx eslint --ext .ts src --max-warnings=0",
"format": "npx prettier --write \"src/**/*.ts\"",
"format-check": "npx prettier --check .",
- "tsc-check": "tsc --noEmit"
+ "tsc-check": "tsc --noEmit",
+ "test": "vitest run"
},
"dependencies": {
"@crowd/archetype-standard": "workspace:*",
@@ -20,13 +21,19 @@
"@crowd/data-access-layer": "workspace:*",
"@crowd/database": "workspace:*",
"@crowd/logging": "workspace:*",
+ "@temporalio/activity": "~1.11.8",
"@temporalio/client": "~1.11.8",
"@temporalio/workflow": "~1.11.8",
+ "semver": "^7.6.0",
"tsx": "^4.7.1",
- "typescript": "^5.6.3"
+ "typescript": "^5.6.3",
+ "unzipper": "^0.12.3"
},
"devDependencies": {
"@types/node": "^20.8.2",
- "nodemon": "^3.0.1"
+ "@types/semver": "^7.5.8",
+ "@types/unzipper": "^0.10.10",
+ "nodemon": "^3.0.1",
+ "vitest": "^3.2.4"
}
}
diff --git a/services/apps/packages_worker/src/activities.ts b/services/apps/packages_worker/src/activities.ts
index b86e9ae448..4c6ec88060 100644
--- a/services/apps/packages_worker/src/activities.ts
+++ b/services/apps/packages_worker/src/activities.ts
@@ -1 +1,2 @@
export { sayHiNpm } from './npm/activities'
+export { osvSyncEcosystem, osvDeriveCriticalFlag } from './osv/activities'
diff --git a/services/apps/packages_worker/src/bin/packages-worker.ts b/services/apps/packages_worker/src/bin/packages-worker.ts
index 7715c934f7..21dee223f4 100644
--- a/services/apps/packages_worker/src/bin/packages-worker.ts
+++ b/services/apps/packages_worker/src/bin/packages-worker.ts
@@ -1,8 +1,10 @@
import { scheduleNpmIngest } from '../npm/schedule'
+import { scheduleOsvSync } from '../osv/schedule'
import { svc } from '../service'
setImmediate(async () => {
await svc.init()
await scheduleNpmIngest()
+ await scheduleOsvSync()
await svc.start()
})
diff --git a/services/apps/packages_worker/src/osv/__tests__/cvssScoring.test.ts b/services/apps/packages_worker/src/osv/__tests__/cvssScoring.test.ts
new file mode 100644
index 0000000000..f2d43de87f
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/cvssScoring.test.ts
@@ -0,0 +1,63 @@
+import { describe, expect, it } from 'vitest'
+
+import { computeV3Score } from '../cvssScoring'
+
+// Reference vectors with FIRST-published base scores. These pin the inline
+// implementation against the official spec — a regression here means the
+// scoring formula itself drifted.
+describe('computeV3Score', () => {
+ it.each([
+ // CVE-2021-44228 (log4shell) — scope change, max impact
+ ['CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H', 10.0],
+ // CVE-2014-6271 (shellshock) — no scope change, max impact
+ ['CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H', 9.8],
+ // CVE-2014-0160 (heartbleed) — confidentiality-only impact
+ ['CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N', 7.5],
+ // CVE-2014-0224 (ChangeCipherSpec) — high complexity, partial impact
+ ['CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:L/I:L/A:N', 4.8],
+ // Local attack, low privileges, integrity-only, low — sanity-check the low end
+ ['CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:L/A:N', 3.3],
+ // All-None impact — must yield exactly 0
+ ['CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:N', 0],
+ ])('scores %s as %s', (vector, expected) => {
+ expect(computeV3Score(vector)).toBe(expected)
+ })
+
+ it('accepts CVSS:3.0 vectors as well as 3.1', () => {
+ // 3.0 and 3.1 share the same formula for these base metrics.
+ expect(computeV3Score('CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H')).toBe(10.0)
+ })
+
+ it('returns null for non-v3 prefixes', () => {
+ expect(computeV3Score('CVSS:2.0/AV:N/AC:L/Au:N/C:P/I:P/A:P')).toBeNull()
+ expect(
+ computeV3Score('CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N'),
+ ).toBeNull()
+ })
+
+ it('returns null for malformed input', () => {
+ expect(computeV3Score('')).toBeNull()
+ expect(computeV3Score('not-a-vector')).toBeNull()
+ expect(computeV3Score('CVSS:3.1/AV:N')).toBeNull() // missing required metrics
+ expect(computeV3Score('CVSS:3.1/AV:Z/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H')).toBeNull() // bad enum
+ })
+
+ // Regression guards for the unvalidated-Scope bug: missing or invalid S used
+ // to silently produce a Scope:Unchanged score instead of returning null.
+ it('returns null when the Scope metric is missing', () => {
+ expect(computeV3Score('CVSS:3.1/AV:N/AC:L/PR:N/UI:N/C:H/I:H/A:H')).toBeNull()
+ })
+
+ it('returns null when the Scope metric is invalid', () => {
+ expect(computeV3Score('CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:X/C:H/I:H/A:H')).toBeNull()
+ })
+
+ it('handles scope-changed PR remapping', () => {
+ // PR:L scores higher under Scope:Changed than under Scope:Unchanged.
+ const unchanged = computeV3Score('CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H')
+ const changed = computeV3Score('CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:C/C:H/I:H/A:H')
+ expect(unchanged).not.toBeNull()
+ expect(changed).not.toBeNull()
+ expect(changed).toBeGreaterThan(unchanged as number)
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/__tests__/deriveCriticalFlag.integration.test.ts b/services/apps/packages_worker/src/osv/__tests__/deriveCriticalFlag.integration.test.ts
new file mode 100644
index 0000000000..d4e7778be5
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/deriveCriticalFlag.integration.test.ts
@@ -0,0 +1,181 @@
+import { afterAll, beforeAll, describe, expect, it } from 'vitest'
+
+import { QueryExecutor, pgpQx } from '@crowd/data-access-layer/src/queryExecutor'
+import { getDbConnection } from '@crowd/database'
+
+import { deriveCriticalFlag } from '../deriveCriticalFlag'
+
+// Integration test: hits the running packages-db. Skipped automatically when
+// any of the DB env vars are missing so unit-test runs in CI stay green and
+// a half-set env (host/port but no database) doesn't fail with a confusing
+// connection error inside beforeAll.
+const HAVE_DB =
+ !!process.env.CROWD_PACKAGES_DB_WRITE_HOST &&
+ !!process.env.CROWD_PACKAGES_DB_PORT &&
+ !!process.env.CROWD_PACKAGES_DB_USERNAME &&
+ !!process.env.CROWD_PACKAGES_DB_DATABASE &&
+ !!process.env.CROWD_PACKAGES_DB_PASSWORD
+
+const FIXTURE_TAG = 'osv-test-fixture'
+
+interface FixturePackage {
+ ecosystem: string
+ namespace: string | null
+ name: string
+ latest_version: string
+}
+
+// One row per real package — we UPDATE latest_version between assertions so the
+// derive function sees both the "in-range" and "above-the-fix" cases without
+// violating the (ecosystem, namespace, name) unique index. This also exercises
+// the FALSE-clearing path: the second derive must un-flag a row the first
+// derive flagged.
+const FIXTURES: Record = {
+ // lodash has multiple critical OSV advisories. The widest one (GHSA-r5fr-...)
+ // covers [4.0.0, 4.18.0). We test the upper boundary by setting latest_version
+ // above 4.18.0 in the cleared-case test — anything below is still vulnerable
+ // to at least one of the five critical advisories on lodash.
+ lodash: { ecosystem: 'npm', namespace: null, name: 'lodash', latest_version: '4.17.20' },
+ // log4shell ranges include [2.13.0, 2.15.0). 2.14.1 in, 2.17.0 above the
+ // related CVE-2021-45046/45105 chain too.
+ log4j: {
+ ecosystem: 'maven',
+ namespace: 'org.apache.logging.log4j',
+ name: 'log4j-core',
+ latest_version: '2.14.1',
+ },
+ // MAL- target — flag should flip via the osv_id LIKE 'MAL-%' branch
+ // regardless of CVSS being NULL.
+ cxpJquery: { ecosystem: 'npm', namespace: null, name: 'cxp-jquery', latest_version: '1.0.0' },
+ // Regression guard for the implicit-empty-qualifier Maven bug. There's no
+ // critical advisory for fixture.test:final-quirk, so the flag must be FALSE.
+ finalQuirk: {
+ ecosystem: 'maven',
+ namespace: 'fixture.test',
+ name: 'final-quirk',
+ latest_version: '1.0-final',
+ },
+}
+
+async function cleanupFixtures(qx: QueryExecutor): Promise {
+ // advisory_packages may have populated package_id via the catch-up step on a
+ // prior run; null them out before we delete the package rows.
+ await qx.result(
+ `UPDATE advisory_packages SET package_id = NULL
+ WHERE package_id IN (SELECT id FROM packages WHERE ingestion_source = $(tag))`,
+ { tag: FIXTURE_TAG },
+ )
+ await qx.result(`DELETE FROM packages WHERE ingestion_source = $(tag)`, { tag: FIXTURE_TAG })
+}
+
+async function insertFixture(qx: QueryExecutor, p: FixturePackage, key: string): Promise {
+ const ns = p.namespace ?? ''
+ const purl = `pkg:test-fixture/${key}/${p.ecosystem}/${ns}/${p.name}`
+ const row = await qx.selectOne(
+ `
+ INSERT INTO packages
+ (purl, ecosystem, namespace, name, latest_version, ingestion_source, status)
+ VALUES
+ ($(purl), $(ecosystem), $(namespace), $(name), $(latest_version), $(tag), 'active')
+ RETURNING id
+ `,
+ { ...p, purl, tag: FIXTURE_TAG },
+ )
+ return row.id as number
+}
+
+async function setVersion(qx: QueryExecutor, id: number, version: string): Promise {
+ await qx.result(`UPDATE packages SET latest_version = $(version) WHERE id = $(id)`, {
+ id,
+ version,
+ })
+}
+
+async function flag(qx: QueryExecutor, id: number): Promise {
+ const row = await qx.selectOne(
+ `SELECT has_critical_vulnerability FROM packages WHERE id = $(id)`,
+ { id },
+ )
+ return row.has_critical_vulnerability as boolean
+}
+
+describe.skipIf(!HAVE_DB)('deriveCriticalFlag — real packages-db', () => {
+ let qx: QueryExecutor
+ const ids: Record = {}
+
+ beforeAll(async () => {
+ // HAVE_DB above already guarantees these env vars exist; fall through to
+ // defensive defaults so the type is `string` without non-null assertions.
+ const conn = await getDbConnection({
+ host: process.env.CROWD_PACKAGES_DB_WRITE_HOST ?? '',
+ port: parseInt(process.env.CROWD_PACKAGES_DB_PORT ?? '0', 10),
+ database: process.env.CROWD_PACKAGES_DB_DATABASE ?? '',
+ user: process.env.CROWD_PACKAGES_DB_USERNAME ?? '',
+ password: process.env.CROWD_PACKAGES_DB_PASSWORD ?? '',
+ })
+ qx = pgpQx(conn)
+ await cleanupFixtures(qx)
+ for (const [key, p] of Object.entries(FIXTURES)) {
+ ids[key] = await insertFixture(qx, p, key)
+ }
+ }, 30_000)
+
+ afterAll(async () => {
+ if (qx) await cleanupFixtures(qx)
+ })
+
+ it('flips lodash 4.17.20 (inside the CVE range) to TRUE', async () => {
+ await setVersion(qx, ids.lodash, '4.17.20')
+ await deriveCriticalFlag(qx, 1000)
+ expect(await flag(qx, ids.lodash)).toBe(true)
+ })
+
+ it('clears the lodash flag at 5.0.0 (above every critical fix)', async () => {
+ // Five critical OSV advisories touch lodash, the widest fixed at 4.18.0.
+ // Setting the package above every fix exercises the FALSE-clearing path:
+ // the previous test left this row flagged TRUE; derive must un-flag it.
+ await setVersion(qx, ids.lodash, '5.0.0')
+ await deriveCriticalFlag(qx, 1000)
+ expect(await flag(qx, ids.lodash)).toBe(false)
+ })
+
+ it('flips log4j-core 2.14.1 (inside log4shell range) to TRUE', async () => {
+ await setVersion(qx, ids.log4j, '2.14.1')
+ await deriveCriticalFlag(qx, 1000)
+ expect(await flag(qx, ids.log4j)).toBe(true)
+ })
+
+ it('clears the log4j-core flag at 2.17.0 (above the patched range)', async () => {
+ await setVersion(qx, ids.log4j, '2.17.0')
+ await deriveCriticalFlag(qx, 1000)
+ expect(await flag(qx, ids.log4j)).toBe(false)
+ })
+
+ it('flips a MAL- target via the osv_id LIKE prefix override', async () => {
+ // cxp-jquery has cvss=NULL so this exercises the MAL- branch of the
+ // derive predicate, not the cvss>=7.0 branch.
+ await deriveCriticalFlag(qx, 1000)
+ expect(await flag(qx, ids.cxpJquery)).toBe(true)
+ })
+
+ it('leaves a Maven 1.0-final package unflagged when no advisory matches', async () => {
+ // Regression guard for the implicit-empty-qualifier bug in compareMaven —
+ // if 1.0-final were treated as < 1.0, a range [0, 1.0) on any unrelated
+ // advisory would spuriously flip the flag.
+ expect(await flag(qx, ids.finalQuirk)).toBe(false)
+ })
+
+ it('catches up advisory_packages.package_id for late-arriving packages', async () => {
+ // The MAL- advisories for cxp-jquery were ingested before the fixture
+ // package existed; derive runs a catch-up UPDATE that resolves package_id.
+ const row = await qx.selectOne(
+ `
+ SELECT COUNT(*) AS resolved
+ FROM advisory_packages
+ WHERE ecosystem = 'npm' AND package_name = 'cxp-jquery' AND package_id = $(id)
+ `,
+ { id: ids.cxpJquery },
+ )
+ expect(Number(row.resolved)).toBeGreaterThan(0)
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/__tests__/extractSeverity.test.ts b/services/apps/packages_worker/src/osv/__tests__/extractSeverity.test.ts
new file mode 100644
index 0000000000..e61d5327cc
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/extractSeverity.test.ts
@@ -0,0 +1,106 @@
+import { describe, expect, it } from 'vitest'
+
+import { extractSeverity } from '../extractSeverity'
+import { OsvRecord } from '../types'
+
+function record(partial: Partial): OsvRecord {
+ return { id: 'GHSA-test', ...partial }
+}
+
+describe('extractSeverity', () => {
+ it('short-circuits MAL- ids before checking severity[]', () => {
+ // Even when a real V3 vector is present (it shouldn't be, but defend the path)
+ // a MAL- id always classifies as a malicious-package report.
+ const r = record({
+ id: 'MAL-2024-12345',
+ severity: [{ type: 'CVSS_V3', score: 'CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H' }],
+ database_specific: { severity: 'CRITICAL' },
+ })
+ expect(extractSeverity(r)).toEqual({
+ severity: null,
+ cvss: null,
+ cvssSource: 'osv_malicious_package',
+ })
+ })
+
+ it('parses a CVSS_V3 vector to numeric and surfaces qualitative tag', () => {
+ const r = record({
+ severity: [{ type: 'CVSS_V3', score: 'CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H' }],
+ database_specific: { severity: 'critical' }, // lowercase tolerated
+ })
+ expect(extractSeverity(r)).toEqual({
+ severity: 'CRITICAL',
+ cvss: 10.0,
+ cvssSource: 'osv_cvss_v3',
+ })
+ })
+
+ it('falls back to qualitative when only V4 is present (V4 numeric not implemented)', () => {
+ // Documented v1 limitation in ADR-0001 §CVSS scoring strategy + cvssScoring.ts.
+ const r = record({
+ severity: [
+ {
+ type: 'CVSS_V4',
+ score: 'CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N',
+ },
+ ],
+ database_specific: { severity: 'HIGH' },
+ })
+ expect(extractSeverity(r)).toEqual({
+ severity: 'HIGH',
+ cvss: 7.5,
+ cvssSource: 'osv_qualitative_fallback',
+ })
+ })
+
+ it('prefers V3 over qualitative when both present', () => {
+ // V3 numeric is more precise than the qualitative bucket.
+ const r = record({
+ severity: [{ type: 'CVSS_V3', score: 'CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H' }],
+ database_specific: { severity: 'MEDIUM' }, // would map to 5.0 if used
+ })
+ const result = extractSeverity(r)
+ expect(result.cvss).toBe(9.8)
+ expect(result.cvssSource).toBe('osv_cvss_v3')
+ })
+
+ it('falls back to qualitative when no vector is parseable', () => {
+ const r = record({ database_specific: { severity: 'MEDIUM' } })
+ expect(extractSeverity(r)).toEqual({
+ severity: 'MEDIUM',
+ cvss: 5.0,
+ cvssSource: 'osv_qualitative_fallback',
+ })
+ })
+
+ it('returns all nulls when nothing usable is present', () => {
+ const r = record({})
+ expect(extractSeverity(r)).toEqual({
+ severity: null,
+ cvss: null,
+ cvssSource: null,
+ })
+ })
+
+ it('returns all nulls when qualitative tag is an unrecognized string', () => {
+ const r = record({ database_specific: { severity: 'unknown-severity' } })
+ expect(extractSeverity(r)).toEqual({
+ severity: null,
+ cvss: null,
+ cvssSource: null,
+ })
+ })
+
+ it('falls back through an unparseable V3 vector to qualitative', () => {
+ // A bad vector is treated as if V3 weren't there at all.
+ const r = record({
+ severity: [{ type: 'CVSS_V3', score: 'CVSS:3.1/junk' }],
+ database_specific: { severity: 'LOW' },
+ })
+ expect(extractSeverity(r)).toEqual({
+ severity: 'LOW',
+ cvss: 3.0,
+ cvssSource: 'osv_qualitative_fallback',
+ })
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/__tests__/parseOsvRecord.test.ts b/services/apps/packages_worker/src/osv/__tests__/parseOsvRecord.test.ts
new file mode 100644
index 0000000000..e9dd55135e
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/parseOsvRecord.test.ts
@@ -0,0 +1,295 @@
+import { describe, expect, it } from 'vitest'
+
+import { parseOsvRecord } from '../parseOsvRecord'
+import { OsvRecord } from '../types'
+
+const ALLOW = new Set(['npm', 'maven'])
+
+const baseRecord = (overrides: Partial): OsvRecord => ({
+ id: 'GHSA-test',
+ ...overrides,
+})
+
+describe('parseOsvRecord — name splitting', () => {
+ it('splits Maven groupId:artifactId', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'Maven', name: 'org.apache.logging.log4j:log4j-core' },
+ ranges: [{ type: 'ECOSYSTEM', events: [{ introduced: '2.0' }, { fixed: '2.15.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toHaveLength(1)
+ expect(out.packages[0].pkg).toMatchObject({
+ ecosystem: 'maven',
+ packageName: 'org.apache.logging.log4j:log4j-core',
+ namespace: 'org.apache.logging.log4j',
+ name: 'log4j-core',
+ })
+ })
+
+ it('strips the leading @ from npm scoped packages', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: '@types/node' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '0' }, { fixed: '1.0.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].pkg).toMatchObject({
+ packageName: '@types/node',
+ namespace: 'types',
+ name: 'node',
+ })
+ })
+
+ it('leaves bare npm packages with namespace=null', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'lodash' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '0' }, { fixed: '4.17.21' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].pkg).toMatchObject({ namespace: null, name: 'lodash' })
+ })
+
+ it('handles a Maven name without colon by falling back to namespace=null', () => {
+ // Real-world Maven names always have the colon, but the parser shouldn't crash
+ // on malformed input.
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'Maven', name: 'log4j-core' },
+ ranges: [{ type: 'ECOSYSTEM', events: [{ introduced: '0' }, { fixed: '2.15.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].pkg).toMatchObject({ namespace: null, name: 'log4j-core' })
+ })
+})
+
+describe('parseOsvRecord — ecosystem allowlist', () => {
+ it('drops affected[] entries for ecosystems outside the allowlist', () => {
+ // Cross-ecosystem advisory (e.g. lodash GHSA hits npm + RubyGems): only npm survives.
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'lodash' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '0' }, { fixed: '4.17.21' }] }],
+ },
+ {
+ package: { ecosystem: 'RubyGems', name: 'lodash' },
+ ranges: [{ type: 'ECOSYSTEM', events: [{ introduced: '0' }, { fixed: '4.17.21' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toHaveLength(1)
+ expect(out.packages[0].pkg.ecosystem).toBe('npm')
+ })
+
+ it('returns packages: [] when no affected entry survives the filter', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'PyPI', name: 'requests' },
+ ranges: [{ type: 'ECOSYSTEM', events: [{ introduced: '0' }, { fixed: '2.31.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toEqual([])
+ // Advisory metadata is still populated so callers can choose to log/skip.
+ expect(out.advisory.osvId).toBe('GHSA-test')
+ })
+})
+
+describe('parseOsvRecord — range flattening', () => {
+ it('flattens introduced → fixed pairs', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'lodash' },
+ ranges: [
+ {
+ type: 'SEMVER',
+ events: [
+ { introduced: '1.0.0' },
+ { fixed: '1.2.0' },
+ { introduced: '2.0.0' },
+ { fixed: '2.5.0' },
+ ],
+ },
+ ],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: '1.2.0', lastAffected: null },
+ { introducedVersion: '2.0.0', fixedVersion: '2.5.0', lastAffected: null },
+ ])
+ })
+
+ it('flattens introduced → last_affected', () => {
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [
+ {
+ type: 'SEMVER',
+ events: [{ introduced: '1.0.0' }, { last_affected: '1.4.9' }],
+ },
+ ],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: null, lastAffected: '1.4.9' },
+ ])
+ })
+
+ it('handles MAL- style ranges (introduced=0, no fixed) as always-vulnerable rows', () => {
+ const r = baseRecord({
+ id: 'MAL-2024-1',
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'evil-pkg' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '0', fixedVersion: null, lastAffected: null },
+ ])
+ })
+
+ it('skips GIT ranges entirely', () => {
+ // Commit-hash ranges aren't useful for version-based matching.
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [
+ { type: 'GIT', events: [{ introduced: 'abc123' }, { fixed: 'def456' }] },
+ { type: 'SEMVER', events: [{ introduced: '1.0.0' }, { fixed: '1.1.0' }] },
+ ],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: '1.1.0', lastAffected: null },
+ ])
+ })
+
+ it('skips an affected[] entry with neither ranges nor versions', () => {
+ const r = baseRecord({
+ affected: [{ package: { ecosystem: 'npm', name: 'pkg' } }],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toEqual([])
+ })
+
+ it('converts versions[] to discrete ranges when ranges[] is empty', () => {
+ // Some OSV records list exact vulnerable versions without a structured
+ // range. Without the conversion the advisory_package would land in the DB
+ // with zero ranges and deriveCriticalFlag would never flag the package.
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ versions: ['1.0.0', '1.0.1', '1.0.2'],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toHaveLength(1)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: null, lastAffected: '1.0.0' },
+ { introducedVersion: '1.0.1', fixedVersion: null, lastAffected: '1.0.1' },
+ { introducedVersion: '1.0.2', fixedVersion: null, lastAffected: '1.0.2' },
+ ])
+ })
+
+ it('ignores versions[] when ranges[] is non-empty (avoids redundant rows)', () => {
+ // If OSV provides both, ranges[] is the source of truth — versions[] is
+ // typically a subset enumeration. Avoid writing the same vulnerability
+ // window twice (once as a structured range, once as a per-version range).
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '1.0.0' }, { fixed: '2.0.0' }] }],
+ versions: ['1.5.0'],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ ])
+ })
+
+ it('merges multiple affected[] entries for the same package', () => {
+ // Some OSV records list the same (ecosystem, name) multiple times to express
+ // disjoint range sets. We collapse them under one advisory_package row.
+ const r = baseRecord({
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '1.0.0' }, { fixed: '1.2.0' }] }],
+ },
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '2.0.0' }, { fixed: '2.5.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.packages).toHaveLength(1)
+ expect(out.packages[0].ranges).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: '1.2.0', lastAffected: null },
+ { introducedVersion: '2.0.0', fixedVersion: '2.5.0', lastAffected: null },
+ ])
+ })
+})
+
+describe('parseOsvRecord — advisory metadata', () => {
+ it('carries through aliases, summary, details, dates', () => {
+ const r = baseRecord({
+ id: 'GHSA-abc-def-ghi',
+ aliases: ['CVE-2021-99999', 'CVE-2022-00001'],
+ summary: 'short summary',
+ details: 'long details',
+ published: '2021-01-01T00:00:00Z',
+ modified: '2021-06-01T00:00:00Z',
+ affected: [
+ {
+ package: { ecosystem: 'npm', name: 'pkg' },
+ ranges: [{ type: 'SEMVER', events: [{ introduced: '0' }, { fixed: '1.0.0' }] }],
+ },
+ ],
+ })
+ const out = parseOsvRecord(r, ALLOW)
+ expect(out.advisory).toMatchObject({
+ osvId: 'GHSA-abc-def-ghi',
+ aliases: ['CVE-2021-99999', 'CVE-2022-00001'],
+ summary: 'short summary',
+ details: 'long details',
+ publishedAt: '2021-01-01T00:00:00Z',
+ modifiedAt: '2021-06-01T00:00:00Z',
+ })
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/__tests__/upsertAdvisory.test.ts b/services/apps/packages_worker/src/osv/__tests__/upsertAdvisory.test.ts
new file mode 100644
index 0000000000..e784902a3e
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/upsertAdvisory.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'vitest'
+
+import { NormalizedRange } from '../types'
+import { dedupeRanges } from '../upsertAdvisory'
+
+// dedupeRanges is the pre-flight pass that keeps inserts from colliding on the
+// (advisory_package_id, introduced, fixed, last_affected) unique index. The
+// scope of the dedup key is the load-bearing detail — keying only on
+// introduced_version (the historical bug guarded by ADR-0001
+// §`advisory_affected_ranges` uniqueness scope) silently dropped
+// ranges that share an introduced_version but differ in fixed_version or
+// last_affected, which OSV emits for cross-distro patches and partial-fix
+// scenarios.
+
+describe('dedupeRanges', () => {
+ it('preserves two ranges sharing introduced but with different fixed_version', () => {
+ // Realistic OSV scenario: two affected[] blocks for the same package
+ // patched at different upstream commits across distros.
+ const ranges: NormalizedRange[] = [
+ { introducedVersion: '1.0.0', fixedVersion: '1.5.0', lastAffected: null },
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ ]
+ expect(dedupeRanges(ranges)).toEqual(ranges)
+ })
+
+ it('preserves two ranges sharing introduced but with different last_affected', () => {
+ const ranges: NormalizedRange[] = [
+ { introducedVersion: '1.0.0', fixedVersion: null, lastAffected: '1.4.9' },
+ { introducedVersion: '1.0.0', fixedVersion: null, lastAffected: '1.9.9' },
+ ]
+ expect(dedupeRanges(ranges)).toEqual(ranges)
+ })
+
+ it('collapses two truly identical tuples to one', () => {
+ // Same OSV record occasionally emits redundant events for the same line —
+ // the Set should still fold those.
+ const ranges: NormalizedRange[] = [
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ ]
+ expect(dedupeRanges(ranges)).toEqual([
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ ])
+ })
+
+ it('treats null introducedVersion the same as the empty string for keying', () => {
+ // Two MAL-style "always vulnerable" entries with different fix lines
+ // (rare, but the keying still has to disambiguate them).
+ const ranges: NormalizedRange[] = [
+ { introducedVersion: null, fixedVersion: null, lastAffected: null },
+ { introducedVersion: null, fixedVersion: '1.0.0', lastAffected: null },
+ ]
+ expect(dedupeRanges(ranges)).toEqual(ranges)
+ })
+
+ it('keeps first occurrence when a tuple is repeated', () => {
+ const first: NormalizedRange = {
+ introducedVersion: '1.0.0',
+ fixedVersion: '2.0.0',
+ lastAffected: null,
+ }
+ const ranges: NormalizedRange[] = [
+ first,
+ { introducedVersion: '1.0.0', fixedVersion: '2.0.0', lastAffected: null },
+ ]
+ const out = dedupeRanges(ranges)
+ expect(out).toHaveLength(1)
+ expect(out[0]).toBe(first)
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/__tests__/versionCompare.test.ts b/services/apps/packages_worker/src/osv/__tests__/versionCompare.test.ts
new file mode 100644
index 0000000000..1394ea5533
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/__tests__/versionCompare.test.ts
@@ -0,0 +1,90 @@
+import { describe, expect, it } from 'vitest'
+
+import { compareVersion } from '../versionCompare'
+
+// Use sign() so tests assert ordering without hard-coding the exact value;
+// the contract is -1/0/1, but we only care about the sign.
+const sign = (n: number | null) => (n === null ? null : Math.sign(n))
+
+describe('compareVersion — npm (semver)', () => {
+ it.each([
+ ['1.2.3', '1.2.4', -1],
+ ['1.2.4', '1.2.3', 1],
+ ['1.2.3', '1.2.3', 0],
+ ['1.10.0', '1.9.0', 1], // numeric, not lex
+ ['1.0.0-alpha', '1.0.0', -1], // prerelease < release
+ ['1.0.0-alpha', '1.0.0-beta', -1],
+ ['1.0.0+meta', '1.0.0', 0], // build metadata ignored
+ ['4.17.20', '4.17.21', -1], // lodash CVE-2021-23337 boundary
+ ])('compareVersion("npm", %s, %s) sign = %s', (a, b, expected) => {
+ expect(sign(compareVersion('npm', a, b))).toBe(expected)
+ })
+
+ it('returns null for unparseable npm versions', () => {
+ expect(compareVersion('npm', 'not-a-version-at-all', '1.0.0')).toBeNull()
+ })
+
+ it('returns null for short / lossy npm versions instead of coercing', () => {
+ // Under-flag over mis-flag: semver.coerce would map "1.2" → "1.2.0" and
+ // "1.2-junk-3" → "1.2.3", which can flip has_critical_vulnerability on
+ // a malformed introduced/fixed boundary. We prefer null (no match).
+ expect(compareVersion('npm', '1.2', '1.3')).toBeNull()
+ expect(compareVersion('npm', '1.2-junk-3', '1.2.4')).toBeNull()
+ expect(compareVersion('npm', 'v1', '1.0.0')).toBeNull()
+ })
+})
+
+describe('compareVersion — maven (ComparableVersion-style)', () => {
+ it.each([
+ // Basic numeric ordering
+ ['1.0', '1.1', -1],
+ ['1.1', '1.0', 1],
+ ['1.0', '1.0', 0],
+ ['1.0', '1.0.0', 0], // implicit zero padding
+ ['1.0', '1.10', -1], // numeric, not lex
+ // log4shell range boundaries
+ ['2.14.1', '2.15.0', -1],
+ ['2.15.0', '2.15.0', 0],
+ ['2.16.0', '2.15.0', 1],
+ // Qualifier ranks
+ ['1.0-alpha', '1.0-beta', -1],
+ ['1.0-beta', '1.0-rc', -1],
+ ['1.0-rc', '1.0', -1], // rc < ga
+ ['1.0-snapshot', '1.0', -1],
+ ['1.0', '1.0-sp1', -1], // sp > ga
+ ['1.0-final', '1.0', 0], // final == ga
+ ['1.0-ga', '1.0', 0],
+ // Qualifier aliases (single-letter forms)
+ ['1.0-a', '1.0-alpha', 0],
+ ['1.0-b', '1.0-beta', 0],
+ ['1.0-m1', '1.0-milestone1', 0],
+ // Numeric beats alpha at same depth
+ ['1.0-1', '1.0-alpha', 1],
+ ])('compareVersion("maven", %s, %s) sign = %s', (a, b, expected) => {
+ expect(sign(compareVersion('maven', a, b))).toBe(expected)
+ })
+
+ it('returns null for unparseable maven versions', () => {
+ // Empty / punctuation-only strings tokenize to [] and used to silently
+ // compare as version 0. Per the comparator contract, return null instead.
+ expect(compareVersion('maven', '', '1.0')).toBeNull()
+ expect(compareVersion('maven', '1.0', '')).toBeNull()
+ expect(compareVersion('maven', '...', '1.0')).toBeNull()
+ })
+})
+
+describe('compareVersion — unsupported ecosystems', () => {
+ it('returns null for ecosystems we have no comparator for', () => {
+ expect(compareVersion('PyPI', '1.0.0', '2.0.0')).toBeNull()
+ expect(compareVersion('crates.io', '0.1', '0.2')).toBeNull()
+ })
+
+ it('rejects titlecase "Maven" — production storage is always lowercase', () => {
+ // Regression guard for the casing bug Fix 1 missed: deriveCriticalFlag
+ // reads `ecosystem` from packages-db where it's lowercase. The comparator
+ // is keyed on the same lowercase form per ADR-0001 §OSV "Ecosystem
+ // normalization". A titlecase 'Maven' call indicates the caller forgot
+ // to normalize.
+ expect(compareVersion('Maven', '1.0', '2.0')).toBeNull()
+ })
+})
diff --git a/services/apps/packages_worker/src/osv/activities.ts b/services/apps/packages_worker/src/osv/activities.ts
new file mode 100644
index 0000000000..f62adbfe1e
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/activities.ts
@@ -0,0 +1,176 @@
+import { ApplicationFailure, Context } from '@temporalio/activity'
+import { rm } from 'node:fs/promises'
+import * as path from 'node:path'
+
+import { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor'
+import { getServiceChildLogger } from '@crowd/logging'
+
+import { getPackagesDb } from '../db'
+
+import { deriveCriticalFlag } from './deriveCriticalFlag'
+import { fetchEcosystemZip } from './fetchEcosystemZip'
+import { parseOsvRecord } from './parseOsvRecord'
+import { FetchError, NormalizedRecord } from './types'
+import { upsertAdvisoryBatch } from './upsertAdvisory'
+
+const log = getServiceChildLogger('osv-sync')
+
+// Per-activity env readers. Each activity reads only the env vars it actually
+// needs at invocation time, so e.g. running the derive activity in isolation
+// for testing doesn't require the sync-only OSV_BULK_BASE_URL / OSV_TMP_DIR /
+// OSV_BATCH_SIZE to be set. Read at invocation time (not module load) so the
+// sandboxed workflow bundle that imports this file for type discovery doesn't
+// require any env to be set.
+
+function getSyncConfig() {
+ return {
+ bulkBaseUrl: required('OSV_BULK_BASE_URL'),
+ tmpDir: required('OSV_TMP_DIR'),
+ upsertBatchSize: requirePositiveInt('OSV_BATCH_SIZE'),
+ }
+}
+
+function getDeriveConfig() {
+ return { deriveBatchSize: requirePositiveInt('OSV_DERIVE_BATCH_SIZE') }
+}
+
+function required(name: string): string {
+ const value = process.env[name]
+ if (!value) throw new Error(`Missing required environment variable: ${name}`)
+ return value
+}
+
+// requirePositiveInt fails fast on non-numeric or zero/negative values so an
+// upstream env-config typo can't turn into NaN propagating through the upsert
+// buffer flush threshold (`buffer.length >= NaN` is always false → unbounded
+// growth) or SQL `LIMIT NaN` errors in the derive loop.
+function requirePositiveInt(name: string): number {
+ const raw = required(name)
+ const parsed = parseInt(raw, 10)
+ if (!Number.isFinite(parsed) || parsed <= 0) {
+ throw new Error(`Env ${name} must be a positive integer, got: ${raw}`)
+ }
+ return parsed
+}
+
+export interface OsvSyncEcosystemInput {
+ ecosystem: string
+ allowedEcosystems: string[]
+}
+
+export interface OsvSyncEcosystemResult {
+ ecosystem: string
+ read: number
+ kept: number
+ skipped: number
+ flushed: number
+ durationMs: number
+}
+
+// osvSyncEcosystem downloads //all.zip, normalizes every
+// record, and upserts the surviving advisories into packages-db. The activity
+// is idempotent on osv_id, so a Temporal retry that re-runs the activity is
+// safe — already-flushed batches simply re-UPSERT with the same values.
+//
+// Heartbeats fire every 1000 records so Temporal sees the activity is alive
+// even on the npm ecosystem (~226k records, ~1 hour with the current N+1
+// upsert path tracked under Copilot's deferred review comment).
+//
+// NOT_FOUND and PARSE FetchErrors are remapped to non-retryable
+// ApplicationFailures — the bucket URL is misconfigured or the OSV dataset
+// itself is malformed; retrying the same payload won't help. Other FetchError
+// kinds (NETWORK, TRANSIENT) propagate so Temporal's RetryPolicy can back off
+// and retry.
+export async function osvSyncEcosystem(
+ input: OsvSyncEcosystemInput,
+): Promise {
+ const { ecosystem, allowedEcosystems } = input
+ const config = getSyncConfig()
+ // OSV's bucket uses case-sensitive paths (Maven/all.zip, not maven/all.zip),
+ // so `ecosystem` (used for the URL) keeps OSV's canonical case. The allowlist
+ // is matched in parseOsvRecord after lowercasing each record's ecosystem per
+ // ADR-0001 §OSV "Ecosystem normalization", so the set is built lowercase here.
+ const allowed = new Set(allowedEcosystems.map((e) => e.toLowerCase()))
+ const start = Date.now()
+
+ const ecoDir = path.join(config.tmpDir, ecosystem)
+ await rm(ecoDir, { recursive: true, force: true }).catch(() => {
+ /* best-effort cleanup; ignore failure */
+ })
+
+ let read = 0
+ let kept = 0
+ let skipped = 0
+ let flushed = 0
+ let buffer: NormalizedRecord[] = []
+
+ const qx = await getPackagesDb()
+
+ const flush = async () => {
+ if (buffer.length === 0) return
+ const batch = buffer
+ buffer = []
+ await upsertAdvisoryBatch(qx, batch)
+ flushed += batch.length
+ }
+
+ try {
+ for await (const entry of fetchEcosystemZip(config.bulkBaseUrl, ecosystem, config.tmpDir)) {
+ read++
+ const normalized = parseOsvRecord(entry.json, allowed)
+ if (normalized.packages.length === 0) {
+ skipped++
+ } else {
+ kept++
+ buffer.push(normalized)
+ if (buffer.length >= config.upsertBatchSize) {
+ await flush()
+ }
+ }
+ if (read % 1000 === 0) {
+ Context.current().heartbeat({ ecosystem, read, kept, skipped, flushed })
+ }
+ }
+
+ await flush()
+ } catch (err) {
+ if (err instanceof FetchError && (err.kind === 'NOT_FOUND' || err.kind === 'PARSE')) {
+ throw ApplicationFailure.nonRetryable(
+ `OSV sync failed for ${ecosystem}: ${err.message}`,
+ err.kind,
+ )
+ }
+ throw err
+ }
+
+ const result: OsvSyncEcosystemResult = {
+ ecosystem,
+ read,
+ kept,
+ skipped,
+ flushed,
+ durationMs: Date.now() - start,
+ }
+ log.info(result, `osvSyncEcosystem done for ${ecosystem}`)
+ return result
+}
+
+export interface OsvDeriveCriticalFlagResult {
+ flipped: number
+ cleared: number
+ durationMs: number
+}
+
+// osvDeriveCriticalFlag recomputes packages.has_critical_vulnerability for
+// every package whose latest_version is set. Idempotent — Temporal retry is
+// safe; re-running clears stale FALSE→TRUE and TRUE→FALSE transitions
+// identically.
+export async function osvDeriveCriticalFlag(): Promise {
+ const config = getDeriveConfig()
+ const start = Date.now()
+ const qx: QueryExecutor = await getPackagesDb()
+ const { flipped, cleared } = await deriveCriticalFlag(qx, config.deriveBatchSize)
+ const result = { flipped, cleared, durationMs: Date.now() - start }
+ log.info(result, 'osvDeriveCriticalFlag done')
+ return result
+}
diff --git a/services/apps/packages_worker/src/osv/cvssScoring.ts b/services/apps/packages_worker/src/osv/cvssScoring.ts
new file mode 100644
index 0000000000..4e5e919935
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/cvssScoring.ts
@@ -0,0 +1,66 @@
+// CVSS v3.1 base score, computed from a CVSS:3.x/... vector string per the FIRST
+// specification at https://www.first.org/cvss/v3.1/specification-document.
+//
+// CVSS v4.0 is not implemented here. v4 scoring requires the macro-vector lookup
+// table from the FIRST v4.0 spec (~270 entries). For v1 we treat V4-only records
+// as missing a numeric vector and let extractSeverity fall back to the qualitative
+// tag; in practice OSV records that include V4 almost always also include V3 (per
+// the 2026-05-27 spike). A proper v4 scorer is tracked as a follow-up.
+
+const AV: Record = { N: 0.85, A: 0.62, L: 0.55, P: 0.2 }
+const AC: Record = { L: 0.77, H: 0.44 }
+const PR_U: Record = { N: 0.85, L: 0.62, H: 0.27 }
+const PR_C: Record = { N: 0.85, L: 0.68, H: 0.5 }
+const UI: Record = { N: 0.85, R: 0.62 }
+const CIA: Record = { N: 0, L: 0.22, H: 0.56 }
+const S_VALUES = new Set(['U', 'C'])
+
+function parseVector(vector: string): Record | null {
+ const parts = vector.split('/').filter(Boolean)
+ if (parts.length === 0) return null
+ const out: Record = {}
+ for (const part of parts) {
+ const eq = part.indexOf(':')
+ if (eq < 0) return null
+ out[part.slice(0, eq)] = part.slice(eq + 1)
+ }
+ return out
+}
+
+// CVSS spec roundUp1: ceil to one decimal place, with tolerance for FP drift.
+function roundUp(x: number): number {
+ const scaled = Math.round(x * 100000)
+ if (scaled % 10000 === 0) return scaled / 100000
+ return (Math.floor(scaled / 10000) + 1) / 10
+}
+
+export function computeV3Score(vector: string): number | null {
+ const v = parseVector(vector)
+ if (!v) return null
+ if (!v.CVSS || !v.CVSS.startsWith('3.')) return null
+
+ const av = AV[v.AV]
+ const ac = AC[v.AC]
+ const ui = UI[v.UI]
+ // Scope is read directly because the metric is qualitative, not numeric, so
+ // it does not slot into the undefined-check below. Validate it explicitly
+ // here — an invalid or missing S would otherwise silently fall through to
+ // the Scope:Unchanged formula and produce a wrong numeric score instead of
+ // null, which is the headline risk flagged in ADR-0001 §CVSS scoring strategy.
+ const s = v.S
+ if (!S_VALUES.has(s)) return null
+ const pr = s === 'C' ? PR_C[v.PR] : PR_U[v.PR]
+ const c = CIA[v.C]
+ const i = CIA[v.I]
+ const a = CIA[v.A]
+ if ([av, ac, ui, pr, c, i, a].some((x) => x === undefined)) return null
+
+ const iss = 1 - (1 - c) * (1 - i) * (1 - a)
+ const impact = s === 'C' ? 7.52 * (iss - 0.029) - 3.25 * Math.pow(iss - 0.02, 15) : 6.42 * iss
+
+ if (impact <= 0) return 0
+
+ const exploitability = 8.22 * av * ac * pr * ui
+ const raw = s === 'C' ? 1.08 * (impact + exploitability) : impact + exploitability
+ return roundUp(Math.min(raw, 10))
+}
diff --git a/services/apps/packages_worker/src/osv/deriveCriticalFlag.ts b/services/apps/packages_worker/src/osv/deriveCriticalFlag.ts
new file mode 100644
index 0000000000..d571e4c7cb
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/deriveCriticalFlag.ts
@@ -0,0 +1,89 @@
+import {
+ RangeRow,
+ clearSafeFlags,
+ flipVulnerableFlags,
+ getPackagePage,
+ getRangesForPackages,
+ resolveMissingPackageIds,
+} from '@crowd/data-access-layer/src/packages/osv'
+import { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor'
+import { getServiceChildLogger } from '@crowd/logging'
+
+import { compareVersion } from './versionCompare'
+
+const log = getServiceChildLogger('osv-sync:derive')
+
+// True iff `version` falls inside the OSV range, per the OSV schema semantics:
+// introduced=null|'0' -> "from the beginning"
+// fixed=null -> "no fix yet" (vulnerable forever from introduced)
+// last_affected=null -> "no upper bound"
+// We treat a comparator return of null (unparseable) as "no match" so a single
+// odd version string never flips the flag without evidence.
+function isInRange(ecosystem: string, version: string, range: RangeRow): boolean {
+ const introduced = range.introduced_version
+ if (introduced && introduced !== '0') {
+ const c = compareVersion(ecosystem, version, introduced)
+ if (c === null || c < 0) return false
+ }
+
+ if (range.fixed_version) {
+ const c = compareVersion(ecosystem, version, range.fixed_version)
+ if (c === null || c >= 0) return false
+ }
+
+ if (range.last_affected) {
+ const c = compareVersion(ecosystem, version, range.last_affected)
+ if (c === null || c > 0) return false
+ }
+
+ // MAL- ranges often have introduced=null/0 with both fixed and last_affected
+ // null. That collapses to "always vulnerable" — the early returns above never
+ // fire, so we fall through to true here.
+ return true
+}
+
+export async function deriveCriticalFlag(
+ qx: QueryExecutor,
+ batchSize: number,
+): Promise<{ flipped: number; cleared: number }> {
+ const resolved = await resolveMissingPackageIds(qx)
+ if (resolved > 0) log.info({ resolved }, 'Resolved advisory_packages.package_id rows')
+
+ let cursor = 0
+ let flipped = 0
+ let cleared = 0
+
+ // Page through packages keyed by id. We compute the vulnerability decision in
+ // TypeScript because the comparator is ecosystem-specific (semver / Maven) and
+ // not expressible in SQL.
+ /* eslint-disable no-constant-condition */
+ while (true) {
+ const pageRows = await getPackagePage(qx, cursor, batchSize)
+ if (pageRows.length === 0) break
+
+ const ids = pageRows.map((r) => r.id)
+ const rangeRows = await getRangesForPackages(qx, ids)
+
+ const rangesByPkg = new Map()
+ for (const r of rangeRows) {
+ const list = rangesByPkg.get(r.pkg_id) ?? []
+ list.push(r)
+ rangesByPkg.set(r.pkg_id, list)
+ }
+
+ const vulnerable: number[] = []
+ const safe: number[] = []
+ for (const row of pageRows) {
+ const ranges = rangesByPkg.get(row.id) ?? []
+ const isVuln = ranges.some((rng) => isInRange(row.ecosystem, row.latest_version, rng))
+ ;(isVuln ? vulnerable : safe).push(row.id)
+ }
+
+ flipped += await flipVulnerableFlags(qx, vulnerable)
+ cleared += await clearSafeFlags(qx, safe)
+
+ cursor = pageRows[pageRows.length - 1].id
+ }
+
+ return { flipped, cleared }
+}
diff --git a/services/apps/packages_worker/src/osv/extractSeverity.ts b/services/apps/packages_worker/src/osv/extractSeverity.ts
new file mode 100644
index 0000000000..3d7f610787
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/extractSeverity.ts
@@ -0,0 +1,60 @@
+import { computeV3Score } from './cvssScoring'
+import { CvssSource, OsvRecord } from './types'
+
+const QUALITATIVE_TO_CVSS: Record = {
+ CRITICAL: 9.5,
+ HIGH: 7.5,
+ MEDIUM: 5.0,
+ LOW: 3.0,
+}
+
+export interface SeverityResult {
+ severity: string | null
+ cvss: number | null
+ cvssSource: CvssSource | null
+}
+
+// Plan §4 / decision #6: MAL- ids are malicious-package reports with no CVSS
+// vector; we mark them with a dedicated cvss_source so deriveCriticalFlag can
+// flip the package flag via id-prefix match without conflating with CVSS≥7.0.
+function isMalicious(id: string): boolean {
+ return id.startsWith('MAL-')
+}
+
+// extractSeverity is pure. The intended order per ADR-0001 §CVSS scoring
+// strategy is V4 → V3 →
+// qualitative tag from database_specific.severity, but v4 numeric scoring is
+// deferred (see cvssScoring.ts), so v1 skips V4 entirely: V3 first, then the
+// qualitative tag. V4-only records fall through to the qualitative fallback
+// and record cvssSource as 'osv_qualitative_fallback'.
+export function extractSeverity(record: OsvRecord): SeverityResult {
+ if (isMalicious(record.id)) {
+ return { severity: null, cvss: null, cvssSource: 'osv_malicious_package' }
+ }
+
+ const qualitativeRaw = record.database_specific?.severity ?? null
+ const qualitative = qualitativeRaw ? qualitativeRaw.toUpperCase() : null
+
+ const severityList = record.severity ?? []
+ const v3 = severityList.find((s) => s.type === 'CVSS_V3')
+
+ // V4 vector parsing is not implemented for v1. If V3 is present alongside V4
+ // (the common case per the spike), we score V3. If V4 is the only vector,
+ // we drop through to qualitative below.
+ if (v3) {
+ const score = computeV3Score(v3.score)
+ if (score !== null) {
+ return { severity: qualitative, cvss: score, cvssSource: 'osv_cvss_v3' }
+ }
+ }
+
+ if (qualitative && qualitative in QUALITATIVE_TO_CVSS) {
+ return {
+ severity: qualitative,
+ cvss: QUALITATIVE_TO_CVSS[qualitative],
+ cvssSource: 'osv_qualitative_fallback',
+ }
+ }
+
+ return { severity: null, cvss: null, cvssSource: null }
+}
diff --git a/services/apps/packages_worker/src/osv/fetchEcosystemZip.ts b/services/apps/packages_worker/src/osv/fetchEcosystemZip.ts
new file mode 100644
index 0000000000..46a8fe6871
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/fetchEcosystemZip.ts
@@ -0,0 +1,128 @@
+import { createWriteStream } from 'node:fs'
+import { mkdir, rm } from 'node:fs/promises'
+import * as path from 'node:path'
+import { Readable } from 'node:stream'
+import { pipeline } from 'node:stream/promises'
+import unzipper from 'unzipper'
+
+import { FetchError, OsvRecord } from './types'
+
+const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000
+const MAX_ENTRY_BYTES = 10 * 1024 * 1024 // 10 MB; real OSV entries are < 100 KB
+
+async function downloadZip(url: string, target: string): Promise {
+ const ac = new AbortController()
+ const timer = setTimeout(() => ac.abort(), DOWNLOAD_TIMEOUT_MS)
+
+ // The timer covers headers + body. fetch() only awaits headers, so the
+ // pipeline that streams the body to disk must run inside the same try/finally
+ // — otherwise a stalled CDN connection mid-transfer would hang past the
+ // daily window with no timeout protection.
+ try {
+ let response: Response
+ try {
+ response = await fetch(url, { signal: ac.signal })
+ } catch (err) {
+ throw new FetchError('NETWORK', `Failed to GET ${url}: ${(err as Error).message}`)
+ }
+
+ if (response.status === 404) {
+ throw new FetchError('NOT_FOUND', `404 fetching ${url}`)
+ }
+ if (response.status >= 500) {
+ throw new FetchError('TRANSIENT', `${response.status} fetching ${url}`)
+ }
+ if (response.status !== 200) {
+ throw new FetchError('NETWORK', `${response.status} fetching ${url}`)
+ }
+ if (!response.body) {
+ throw new FetchError('NETWORK', `Empty body for ${url}`)
+ }
+
+ try {
+ await pipeline(Readable.fromWeb(response.body), createWriteStream(target))
+ } catch (err) {
+ // pipeline rejects with an AbortError when the timer fires mid-stream;
+ // surface that as a transient network failure so withRetry will retry.
+ throw new FetchError('NETWORK', `Stream failed for ${url}: ${(err as Error).message}`)
+ }
+ } finally {
+ clearTimeout(timer)
+ }
+}
+
+export interface OsvZipEntry {
+ filename: string
+ json: OsvRecord
+}
+
+// fetchEcosystemZip downloads //all.zip into tmpDir//
+// and yields each contained OSV record. Memory stays bounded: we stream the
+// download to disk, then iterate the central directory and decompress one
+// entry at a time. The full zip is held on disk for the duration of the sync
+// pass; callers are expected to clear tmpDir at the start of each pass.
+export async function* fetchEcosystemZip(
+ baseUrl: string,
+ ecosystem: string,
+ tmpDir: string,
+): AsyncIterable {
+ const ecoDir = path.join(tmpDir, ecosystem)
+ await mkdir(ecoDir, { recursive: true })
+ const zipPath = path.join(ecoDir, 'all.zip')
+
+ const url = `${baseUrl.replace(/\/$/, '')}/${ecosystem}/all.zip`
+
+ try {
+ await downloadZip(url, zipPath)
+
+ let directory: unzipper.CentralDirectory
+ try {
+ directory = await unzipper.Open.file(zipPath)
+ } catch (err) {
+ throw new FetchError('PARSE', `Malformed zip from ${url}: ${(err as Error).message}`)
+ }
+
+ for (const file of directory.files) {
+ if (file.type !== 'File') continue
+ if (!file.path.toLowerCase().endsWith('.json')) continue
+
+ // Real OSV entries are well under 100 KB. Check the central-directory
+ // uncompressedSize BEFORE calling file.buffer(), because file.buffer()
+ // decompresses the entry into memory first — a zip-bomb-style payload
+ // (small compressedSize, huge uncompressedSize) would OOM the worker
+ // before any post-decompress length check could fire. Surfaced as PARSE
+ // so withRetry gives up immediately; retrying the same payload won't help.
+ if (file.uncompressedSize > MAX_ENTRY_BYTES) {
+ throw new FetchError(
+ 'PARSE',
+ `Entry ${ecosystem}/${file.path} declares uncompressedSize ${file.uncompressedSize}, exceeds ${MAX_ENTRY_BYTES} bytes`,
+ )
+ }
+ const buffer = await file.buffer()
+ // Defense in depth: a malformed central directory could lie about
+ // uncompressedSize. Re-check the actual decompressed length.
+ if (buffer.length > MAX_ENTRY_BYTES) {
+ throw new FetchError(
+ 'PARSE',
+ `Entry ${ecosystem}/${file.path} exceeds ${MAX_ENTRY_BYTES} bytes (got ${buffer.length})`,
+ )
+ }
+ let json: OsvRecord
+ try {
+ json = JSON.parse(buffer.toString('utf8')) as OsvRecord
+ } catch (err) {
+ throw new FetchError(
+ 'PARSE',
+ `Bad JSON in ${ecosystem}/${file.path}: ${(err as Error).message}`,
+ )
+ }
+ yield { filename: file.path, json }
+ }
+ } finally {
+ // Best-effort cleanup; leave the directory if it fails so the next pass
+ // can investigate.
+ await rm(zipPath, { force: true }).catch(() => {
+ /* best-effort cleanup; ignore failure */
+ })
+ }
+}
diff --git a/services/apps/packages_worker/src/osv/parseOsvRecord.ts b/services/apps/packages_worker/src/osv/parseOsvRecord.ts
new file mode 100644
index 0000000000..057a34b9fc
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/parseOsvRecord.ts
@@ -0,0 +1,166 @@
+import { extractSeverity } from './extractSeverity'
+import {
+ NormalizedAdvisoryPackage,
+ NormalizedPackageEntry,
+ NormalizedRange,
+ NormalizedRecord,
+ OsvAffected,
+ OsvRecord,
+} from './types'
+
+// Splits an OSV `package.name` into the (namespace, name) pair that matches
+// the packages table's unique index on (ecosystem, COALESCE(namespace,''), name).
+// - npm: '@scope/pkg' -> namespace='scope', name='pkg'; bare 'pkg' -> (null,'pkg')
+// - maven: 'groupId:artifactId' -> namespace='groupId', name='artifactId'
+// - Other ecosystems: (null, name); we never reach this in v1 due to the allowlist.
+//
+// Ecosystem strings here are already lowercased (see parseOsvRecord) per the
+// ADR-0001 §OSV "Ecosystem normalization" rule.
+function splitName(ecosystem: string, rawName: string): { namespace: string | null; name: string } {
+ if (ecosystem === 'maven') {
+ const colon = rawName.indexOf(':')
+ if (colon > 0) {
+ return { namespace: rawName.slice(0, colon), name: rawName.slice(colon + 1) }
+ }
+ return { namespace: null, name: rawName }
+ }
+
+ if (ecosystem === 'npm' && rawName.startsWith('@')) {
+ const slash = rawName.indexOf('/')
+ if (slash > 1) {
+ return { namespace: rawName.slice(1, slash), name: rawName.slice(slash + 1) }
+ }
+ }
+
+ return { namespace: null, name: rawName }
+}
+
+// Builds the canonical OSV advisory URL for the source_url column. Deterministic
+// per ADR-0001 §OSV (`source` and `source_url` columns are part of the
+// advisories contract); the granular GHSA/NVD/NSWG attribution that BQ
+// ingestion will populate comes through a different worker.
+function osvSourceUrl(id: string): string {
+ return `https://osv.dev/vulnerability/${id}`
+}
+
+// Reduces an OSV `affected[i].ranges[]` block into the flat shape we store.
+// Each OSV range is a sequence of events along a single line; we collapse
+// each contiguous (introduced -> fixed | last_affected) span into one row.
+// Events for unrelated ranges live in separate `ranges[]` entries, so we
+// process them independently.
+function flattenRanges(affected: OsvAffected): NormalizedRange[] {
+ const out: NormalizedRange[] = []
+
+ for (const range of affected.ranges ?? []) {
+ // GIT ranges are commit-hash bounds and not useful for version-based
+ // vulnerability matching; skip them.
+ if (range.type === 'GIT') continue
+
+ let introduced: string | null = null
+ for (const event of range.events ?? []) {
+ if (event.introduced !== undefined) {
+ // Flush a prior open introduced= row that never saw a fixed/last_affected.
+ if (introduced !== null) {
+ out.push({ introducedVersion: introduced, fixedVersion: null, lastAffected: null })
+ }
+ // OSV uses '0' to mean "from the beginning"; preserve it as-is so the
+ // comparator can treat it as "always vulnerable" (decision #6 / MAL- case).
+ introduced = event.introduced
+ } else if (event.fixed !== undefined) {
+ out.push({
+ introducedVersion: introduced,
+ fixedVersion: event.fixed,
+ lastAffected: null,
+ })
+ introduced = null
+ } else if (event.last_affected !== undefined) {
+ out.push({
+ introducedVersion: introduced,
+ fixedVersion: null,
+ lastAffected: event.last_affected,
+ })
+ introduced = null
+ }
+ // `limit` events bound the range scan but do not themselves define a
+ // vulnerable window; ignore.
+ }
+ if (introduced !== null) {
+ out.push({ introducedVersion: introduced, fixedVersion: null, lastAffected: null })
+ }
+ }
+
+ return out
+}
+
+// parseOsvRecord normalizes a single OSV JSON record into the shape we write.
+// - applies the ecosystem allowlist (records whose only affected[] entries are
+// outside the allowlist return packages: []; upsertAdvisory skips writing them)
+// - splits names per-ecosystem so package_id resolution can join via
+// (ecosystem, COALESCE(namespace,''), name)
+// - preserves the advisory metadata even when no affected packages survive the
+// filter, so callers can decide to skip vs. log
+export function parseOsvRecord(
+ record: OsvRecord,
+ allowedEcosystems: Set,
+): NormalizedRecord {
+ const severity = extractSeverity(record)
+
+ // Group affected[] entries by (ecosystem, package_name) — OSV may list the
+ // same package multiple times when there are disjoint affected ranges.
+ const seen = new Map()
+ for (const affected of record.affected ?? []) {
+ const pkg = affected.package
+ if (!pkg) continue
+ const ecosystem = pkg.ecosystem.toLowerCase()
+ if (!allowedEcosystems.has(ecosystem)) continue
+
+ const ranges = flattenRanges(affected)
+ // If OSV only provides an enumerated `versions[]` list (no `ranges[]`),
+ // convert each exact version into a degenerate range (introduced=v,
+ // last_affected=v) so isInRange matches when `latest_version === v`.
+ // Without this we'd record the advisory_package but no range rows, and
+ // deriveCriticalFlag would silently fail to flag the package.
+ if (ranges.length === 0 && (affected.versions ?? []).length > 0) {
+ for (const v of affected.versions ?? []) {
+ ranges.push({ introducedVersion: v, fixedVersion: null, lastAffected: v })
+ }
+ }
+ if (ranges.length === 0) {
+ // No usable affected range at all; skip this package entry but keep going.
+ continue
+ }
+
+ const key = `${ecosystem} ${pkg.name}`
+ const split = splitName(ecosystem, pkg.name)
+ const pkgRow: NormalizedAdvisoryPackage = {
+ ecosystem,
+ packageName: pkg.name,
+ namespace: split.namespace,
+ name: split.name,
+ }
+
+ const existing = seen.get(key)
+ if (existing) {
+ existing.ranges.push(...ranges)
+ } else {
+ seen.set(key, { pkg: pkgRow, ranges })
+ }
+ }
+
+ return {
+ advisory: {
+ osvId: record.id,
+ source: 'OSV',
+ sourceUrl: osvSourceUrl(record.id),
+ aliases: record.aliases ?? [],
+ severity: severity.severity,
+ cvss: severity.cvss,
+ cvssSource: severity.cvssSource,
+ summary: record.summary ?? null,
+ details: record.details ?? null,
+ publishedAt: record.published ?? null,
+ modifiedAt: record.modified ?? null,
+ },
+ packages: [...seen.values()],
+ }
+}
diff --git a/services/apps/packages_worker/src/osv/schedule.ts b/services/apps/packages_worker/src/osv/schedule.ts
new file mode 100644
index 0000000000..0be43c2832
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/schedule.ts
@@ -0,0 +1,81 @@
+import { ScheduleAlreadyRunning, ScheduleOverlapPolicy } from '@temporalio/client'
+
+import { svc } from '../service'
+import { osvSync } from '../workflows'
+
+const SCHEDULE_ID = 'osv-advisories-sync'
+
+// Ecosystems we support today. The value here doubles as the case-sensitive
+// path segment in OSV's bucket URL (//all.zip), so a typo
+// like `OSV_ECOSYSTEMS=maven` (lowercase) would 404 silently every day. We
+// validate the env input against this list and refuse to register the
+// schedule on a mismatch — better a loud startup error than a silent miss.
+// Add new entries here when v1 expands beyond npm + Maven.
+const VALID_ECOSYSTEMS = ['npm', 'Maven'] as const
+
+function getEcosystems(): string[] {
+ const raw = process.env.OSV_ECOSYSTEMS
+ if (!raw) throw new Error('Missing required environment variable: OSV_ECOSYSTEMS')
+ const list = raw
+ .split(',')
+ .map((s) => s.trim())
+ .filter(Boolean)
+ const deduped = [...new Set(list)]
+ if (deduped.length === 0) throw new Error('OSV_ECOSYSTEMS resolved to an empty list')
+ for (const eco of deduped) {
+ if (!(VALID_ECOSYSTEMS as readonly string[]).includes(eco)) {
+ const hint = VALID_ECOSYSTEMS.find((v) => v.toLowerCase() === eco.toLowerCase())
+ const supported = VALID_ECOSYSTEMS.join(', ')
+ const msg = hint
+ ? `OSV_ECOSYSTEMS contains "${eco}" — did you mean "${hint}"? OSV's bucket paths are case-sensitive.`
+ : `OSV_ECOSYSTEMS contains unsupported ecosystem "${eco}". Supported: ${supported}.`
+ throw new Error(msg)
+ }
+ }
+ return deduped
+}
+
+// Registers the daily OSV advisory sync schedule if it doesn't already exist.
+// If the schedule already exists in Temporal, we log and leave it unchanged (no update).
+// Cron is offset from npm-registry-ingest (`15 3 * * *`) so the two large daily ingest jobs don't fight for the same DB at the same minute.
+export async function scheduleOsvSync(): Promise {
+ const { temporal } = svc
+ if (!temporal) throw new Error('Temporal client not initialized')
+
+ try {
+ await temporal.schedule.create({
+ scheduleId: SCHEDULE_ID,
+ spec: {
+ cronExpressions: ['30 3 * * *'],
+ },
+ policies: {
+ // SKIP per ADR-0001 §Worker architecture: if a slow run is still
+ // executing when the next fire time comes around, don't queue a
+ // second concurrent run.
+ overlap: ScheduleOverlapPolicy.SKIP,
+ catchupWindow: '1 hour',
+ },
+ action: {
+ type: 'startWorkflow',
+ workflowType: osvSync,
+ taskQueue: 'packages-worker',
+ // Headroom for npm (~1 hour today) + Maven (~5 minutes) + derive
+ // (~5 minutes for 600-700k packages); 4 hours leaves space for the
+ // upsertOne N+1 deferred fix being slower than expected.
+ workflowExecutionTimeout: '4 hours',
+ retry: {
+ initialInterval: '30 seconds',
+ backoffCoefficient: 2,
+ maximumAttempts: 3,
+ },
+ args: [{ ecosystems: getEcosystems() }],
+ },
+ })
+ } catch (err) {
+ if (err instanceof ScheduleAlreadyRunning) {
+ svc.log.info(`Schedule ${SCHEDULE_ID} already registered.`)
+ } else {
+ throw err
+ }
+ }
+}
diff --git a/services/apps/packages_worker/src/osv/types.ts b/services/apps/packages_worker/src/osv/types.ts
new file mode 100644
index 0000000000..aec356afc6
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/types.ts
@@ -0,0 +1,115 @@
+// ---- Raw OSV JSON (only the fields we read) ----
+// Reference: https://ossf.github.io/osv-schema/
+
+export interface OsvSeverity {
+ // 'CVSS_V2' | 'CVSS_V3' | 'CVSS_V4' | other future types
+ type: string
+ // The CVSS vector string, not a numeric score.
+ score: string
+}
+
+export interface OsvRangeEvent {
+ introduced?: string
+ fixed?: string
+ last_affected?: string
+ limit?: string
+}
+
+export interface OsvRange {
+ // 'SEMVER' | 'ECOSYSTEM' | 'GIT'
+ type: string
+ events: OsvRangeEvent[]
+}
+
+export interface OsvAffected {
+ package?: {
+ ecosystem: string
+ name: string
+ purl?: string
+ }
+ ranges?: OsvRange[]
+ versions?: string[]
+ database_specific?: Record
+}
+
+export interface OsvRecord {
+ id: string
+ aliases?: string[]
+ summary?: string
+ details?: string
+ published?: string
+ modified?: string
+ affected?: OsvAffected[]
+ severity?: OsvSeverity[]
+ database_specific?: {
+ severity?: string
+ [k: string]: unknown
+ }
+ references?: Array<{ type?: string; url?: string }>
+ schema_version?: string
+}
+
+// ---- Normalized rows we write to packages-db ----
+
+export type CvssSource =
+ | 'osv_cvss_v3'
+ | 'osv_cvss_v4'
+ | 'osv_qualitative_fallback'
+ | 'osv_malicious_package'
+
+export interface NormalizedAdvisory {
+ osvId: string
+ // ADR-0001 §OSV / advisories.source — 'OSV' for everything ingested by this
+ // worker. The granular 'GHSA' | 'NVD' | 'NSWG' attribution is the deps.dev
+ // BQ ingestion worker's responsibility, not this one.
+ source: string
+ sourceUrl: string | null
+ aliases: string[]
+ severity: string | null
+ cvss: number | null
+ cvssSource: CvssSource | null
+ summary: string | null
+ details: string | null
+ publishedAt: string | null
+ modifiedAt: string | null
+}
+
+export interface NormalizedAdvisoryPackage {
+ ecosystem: string
+ packageName: string
+ // namespace/name pair used to resolve packages.id; mirrors the COALESCE-aware
+ // unique index on packages(ecosystem, COALESCE(namespace,''), name).
+ namespace: string | null
+ name: string
+}
+
+export interface NormalizedRange {
+ introducedVersion: string | null
+ fixedVersion: string | null
+ lastAffected: string | null
+}
+
+export interface NormalizedPackageEntry {
+ pkg: NormalizedAdvisoryPackage
+ ranges: NormalizedRange[]
+}
+
+export interface NormalizedRecord {
+ advisory: NormalizedAdvisory
+ packages: NormalizedPackageEntry[]
+}
+
+// ---- Error type, mirrors enricher/types.ts ----
+
+export type FetchErrorKind = 'NETWORK' | 'NOT_FOUND' | 'PARSE' | 'TRANSIENT'
+
+export class FetchError extends Error {
+ constructor(
+ public readonly kind: FetchErrorKind,
+ message: string,
+ public readonly resetAt?: number,
+ ) {
+ super(message)
+ this.name = 'FetchError'
+ }
+}
diff --git a/services/apps/packages_worker/src/osv/upsertAdvisory.ts b/services/apps/packages_worker/src/osv/upsertAdvisory.ts
new file mode 100644
index 0000000000..bcede60f37
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/upsertAdvisory.ts
@@ -0,0 +1,78 @@
+import {
+ deleteOsvOnlyRanges,
+ findPackageId,
+ insertAdvisoryRange,
+ upsertAdvisory,
+ upsertAdvisoryPackage,
+} from '@crowd/data-access-layer/src/packages/osv'
+import { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor'
+
+import { NormalizedRange, NormalizedRecord } from './types'
+
+// Drop duplicate ranges that would collide on the unique index over
+// (advisory_package_id, COALESCE(introduced_version,''),
+// COALESCE(fixed_version,''), COALESCE(last_affected,'')).
+// We key on the full range tuple so two ranges sharing an introduced_version
+// but differing in fixed_version or last_affected both survive — per
+// osv-plan §2 decision #1 ("one package has many version ranges; no
+// denormalization") and ADR-0001 §`advisory_affected_ranges` uniqueness scope.
+// OSV still occasionally emits redundant
+// events for the exact same tuple, which the Set collapses to one row.
+export function dedupeRanges(ranges: NormalizedRange[]): NormalizedRange[] {
+ const seen = new Set()
+ const out: NormalizedRange[] = []
+ for (const r of ranges) {
+ const key = `${r.introducedVersion ?? ''}|${r.fixedVersion ?? ''}|${r.lastAffected ?? ''}`
+ if (seen.has(key)) continue
+ seen.add(key)
+ out.push(r)
+ }
+ return out
+}
+
+async function upsertOne(qx: QueryExecutor, record: NormalizedRecord): Promise {
+ const { advisory, packages } = record
+ if (packages.length === 0) return
+
+ const advisoryId = await upsertAdvisory(qx, advisory)
+
+ for (const entry of packages) {
+ const packageId = await findPackageId(qx, entry.pkg)
+
+ const advisoryPackageId = await upsertAdvisoryPackage(qx, {
+ advisoryId,
+ packageId,
+ ecosystem: entry.pkg.ecosystem,
+ packageName: entry.pkg.packageName,
+ })
+
+ await deleteOsvOnlyRanges(qx, advisoryPackageId)
+
+ for (const range of dedupeRanges(entry.ranges)) {
+ await insertAdvisoryRange(qx, {
+ advisoryPackageId,
+ introducedVersion: range.introducedVersion,
+ fixedVersion: range.fixedVersion,
+ lastAffected: range.lastAffected,
+ })
+ }
+ }
+}
+
+// upsertAdvisoryBatch writes a batch of normalized OSV records, one record per
+// transaction. Per-record scope keeps advisory_packages row locks short (a few
+// statements instead of ~OSV_BATCH_SIZE × N+1) so concurrent writers aren't
+// blocked, and a Temporal activity cancel mid-batch only loses the in-flight
+// record instead of forcing the whole batch to re-do. upsertOne is idempotent
+// on osv_id, so a Temporal retry that re-runs the activity simply re-UPSERTs
+// the already-committed records with the same values.
+export async function upsertAdvisoryBatch(
+ qx: QueryExecutor,
+ batch: NormalizedRecord[],
+): Promise {
+ for (const record of batch) {
+ await qx.tx(async (tx) => {
+ await upsertOne(tx, record)
+ })
+ }
+}
diff --git a/services/apps/packages_worker/src/osv/versionCompare.ts b/services/apps/packages_worker/src/osv/versionCompare.ts
new file mode 100644
index 0000000000..631f2d3aea
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/versionCompare.ts
@@ -0,0 +1,136 @@
+import * as semver from 'semver'
+
+// Ecosystem-specific version ordering. Returns -1, 0, 1 like Array#sort,
+// or null when either operand cannot be parsed (we treat that as "do not match").
+
+// We deliberately do NOT fall back to semver.coerce. Coerce is lossy:
+// "1.2-junk-3" becomes "1.2.3" and "v1" becomes "1.0.0", which can mint a
+// false-positive range match that flips has_critical_vulnerability without
+// evidence. semver.parse with { loose: true } already accepts the common
+// shapes OSV emits (leading "v", prerelease tags); anything it rejects we
+// treat as unparseable and isInRange interprets as "no match". Under-flag
+// over mis-flag is the trade-off we want here.
+function parseLoose(v: string): semver.SemVer | null {
+ return semver.parse(v, { loose: true })
+}
+
+function compareNpm(a: string, b: string): number | null {
+ const pa = parseLoose(a)
+ const pb = parseLoose(b)
+ if (!pa || !pb) return null
+ return semver.compare(pa, pb)
+}
+
+// Minimal Maven ComparableVersion. Tokenizes on '.', '-', and on the boundary
+// between numeric and alpha runs, then compares token-by-token with the standard
+// Maven qualifier ranks. Handles the bulk of real-world versions (numeric dotted
+// + common qualifiers); intentionally not bug-for-bug compatible with Maven's
+// org.apache.maven.artifact.versioning.ComparableVersion.
+type Token = { kind: 'num'; value: number } | { kind: 'str'; value: string }
+
+function tokenizeMaven(version: string): Token[] {
+ const out: Token[] = []
+ const s = version.toLowerCase()
+ let i = 0
+ while (i < s.length) {
+ const c = s[i]
+ if (c === '.' || c === '-') {
+ i++
+ continue
+ }
+ if (c >= '0' && c <= '9') {
+ let j = i
+ while (j < s.length && s[j] >= '0' && s[j] <= '9') j++
+ out.push({ kind: 'num', value: parseInt(s.slice(i, j), 10) })
+ i = j
+ } else {
+ let j = i
+ while (j < s.length && s[j] !== '.' && s[j] !== '-' && !(s[j] >= '0' && s[j] <= '9')) {
+ j++
+ }
+ out.push({ kind: 'str', value: s.slice(i, j) })
+ i = j
+ }
+ }
+ return out
+}
+
+const MAVEN_QUALIFIER_RANK: Record = {
+ alpha: 0,
+ a: 0,
+ beta: 1,
+ b: 1,
+ milestone: 2,
+ m: 2,
+ rc: 3,
+ cr: 3,
+ snapshot: 4,
+ '': 5,
+ ga: 5,
+ final: 5,
+ release: 5,
+ sp: 6,
+}
+
+function cmpToken(a: Token, b: Token): number {
+ if (a.kind === 'num' && b.kind === 'num') {
+ return a.value < b.value ? -1 : a.value > b.value ? 1 : 0
+ }
+ // Maven: numeric tokens rank above qualifier tokens at the same depth.
+ if (a.kind === 'num') return 1
+ if (b.kind === 'num') return -1
+ const ar = MAVEN_QUALIFIER_RANK[a.value]
+ const br = MAVEN_QUALIFIER_RANK[b.value]
+ if (ar !== undefined && br !== undefined) {
+ return ar < br ? -1 : ar > br ? 1 : 0
+ }
+ // Known qualifier ranks below unknown alpha tokens.
+ if (ar !== undefined) return -1
+ if (br !== undefined) return 1
+ return a.value < b.value ? -1 : a.value > b.value ? 1 : 0
+}
+
+// When one side runs out of tokens, Maven's ComparableVersion substitutes a
+// "null" token whose kind matches the *other* side at that position: num→0,
+// str→'' (empty qualifier, ranked equal to 'ga'/'final'/'release'). Picking the
+// kind unconditionally (e.g. always num:0) breaks comparisons like
+// `1.0-final == 1.0` and `1.0 < 1.0-sp1`.
+function padFor(other: Token): Token {
+ return other.kind === 'num' ? { kind: 'num', value: 0 } : { kind: 'str', value: '' }
+}
+
+function compareMaven(a: string, b: string): number | null {
+ const ta = tokenizeMaven(a)
+ const tb = tokenizeMaven(b)
+ // Empty or punctuation-only inputs (e.g. '', '...', '---') tokenize to []
+ // and would otherwise be treated as version 0. Per the compareVersion
+ // contract ("returns null when either operand cannot be parsed"), reject
+ // them here so isInRange treats them as "no match" — safer than silently
+ // ordering garbage as 0.
+ if (ta.length === 0 || tb.length === 0) return null
+ const max = Math.max(ta.length, tb.length)
+ for (let i = 0; i < max; i++) {
+ if (i >= ta.length) {
+ const c = cmpToken(padFor(tb[i]), tb[i])
+ if (c !== 0) return c
+ continue
+ }
+ if (i >= tb.length) {
+ const c = cmpToken(ta[i], padFor(ta[i]))
+ if (c !== 0) return c
+ continue
+ }
+ const c = cmpToken(ta[i], tb[i])
+ if (c !== 0) return c
+ }
+ return 0
+}
+
+// Ecosystem names are stored lowercase in packages-db per ADR-0001 §OSV
+// "Ecosystem normalization" — 'npm' and 'maven'. Callers (deriveCriticalFlag)
+// pull the value straight from the DB so the literals here must match.
+export function compareVersion(ecosystem: string, a: string, b: string): number | null {
+ if (ecosystem === 'npm') return compareNpm(a, b)
+ if (ecosystem === 'maven') return compareMaven(a, b)
+ return null
+}
diff --git a/services/apps/packages_worker/src/osv/workflows.ts b/services/apps/packages_worker/src/osv/workflows.ts
new file mode 100644
index 0000000000..24334f6bd2
--- /dev/null
+++ b/services/apps/packages_worker/src/osv/workflows.ts
@@ -0,0 +1,94 @@
+import { log, proxyActivities } from '@temporalio/workflow'
+
+import type * as activities from './activities'
+
+// Per ADR-0001 §CVSS scoring strategy the v3.1 inline scorer + qualitative
+// fallback are mature; v4 is deferred to a follow-up. Per ADR-0001
+// §`advisory_affected_ranges` uniqueness scope the dedup key matches the
+// widened unique index so multi-distro ranges are preserved. Per ADR-0001
+// §`has_critical_vulnerability` semantics the derive step runs after every
+// ingest pass so packages added between schedule firings are at most one
+// cycle stale and self-heal on the next run.
+//
+// Sync and derive use separate proxyActivities configs because their
+// heartbeat shape differs: sync emits one heartbeat per ~1000 records (see
+// activities.ts) so a 5-minute heartbeatTimeout is the right liveness signal;
+// derive is a single tight loop over packages with no per-page heartbeat,
+// so it relies on startToCloseTimeout only — sharing the sync heartbeat
+// config would silently cancel the derive activity at Tier 2 scale.
+const { osvSyncEcosystem } = proxyActivities({
+ // npm sync alone is ~1 hour today (N+1 upsert path, see deferred review
+ // comment on upsertAdvisory.ts). Maven is ~5 minutes. We give each
+ // per-ecosystem activity a generous 2-hour ceiling.
+ startToCloseTimeout: '2 hours',
+ // Activity heartbeats every 1000 records (see activities.ts), but the FIRST
+ // heartbeat only fires after the full ecosystem zip has been downloaded
+ // (no heartbeat during downloadZip). DOWNLOAD_TIMEOUT_MS in
+ // fetchEcosystemZip.ts is 10 minutes, so the heartbeatTimeout must exceed
+ // that — otherwise Temporal kills the activity as unresponsive on a slow
+ // CDN even though the download is progressing. 15 minutes leaves 5 minutes
+ // of headroom past the download cap before the next heartbeat is expected.
+ heartbeatTimeout: '15 minutes',
+ retry: {
+ initialInterval: '30 seconds',
+ backoffCoefficient: 2,
+ maximumAttempts: 3,
+ // Activities translate FetchError(NOT_FOUND|PARSE) to ApplicationFailure
+ // with these type names — Temporal short-circuits the retry.
+ nonRetryableErrorTypes: ['NOT_FOUND', 'PARSE'],
+ },
+})
+
+const { osvDeriveCriticalFlag } = proxyActivities({
+ // Paged scan over packages (~600-700k at Tier 2 scale). The whole derive
+ // pass runs in ~5 minutes on the current dataset; we give it 1 hour of
+ // headroom for the table to grow. No heartbeatTimeout — the activity does
+ // not heartbeat, so adding one would cancel the activity silently before
+ // startToCloseTimeout fires.
+ startToCloseTimeout: '1 hour',
+ retry: {
+ initialInterval: '30 seconds',
+ backoffCoefficient: 2,
+ maximumAttempts: 3,
+ },
+})
+
+export interface OsvSyncWorkflowInput {
+ // OSV ecosystem labels in OSV's canonical case ('npm', 'Maven', 'PyPI', …).
+ // Same list is used by the per-ecosystem download loop, where OSV's bucket
+ // is case-sensitive (Maven/all.zip), and as the allowlist source for
+ // parseOsvRecord. The activity lowercases the allowlist internally before
+ // matching parsed records, since storage normalizes to lowercase per
+ // ADR-0001 §OSV "Ecosystem normalization".
+ ecosystems: string[]
+}
+
+// osvSync replaces the standalone-bin runOsvSync loop. The schedule (see
+// schedule.ts) fires this workflow daily; cadence is owned by Temporal, not
+// by an internal sleep. One ecosystem failing is recoverable: the workflow
+// records the per-ecosystem result and continues with the next, then runs
+// derivation on whatever did ingest.
+export async function osvSync(input: OsvSyncWorkflowInput): Promise {
+ log.info('osvSync starting', { ecosystems: input.ecosystems })
+
+ const allowedEcosystems = input.ecosystems
+
+ for (const ecosystem of input.ecosystems) {
+ try {
+ const stats = await osvSyncEcosystem({ ecosystem, allowedEcosystems })
+ log.info('osvSyncEcosystem succeeded', { ...stats })
+ } catch (err) {
+ // Per-ecosystem failure does not abort the pass — log and continue so
+ // the other ecosystems and the derive step still run on partial data.
+ log.error('osvSyncEcosystem failed; continuing with next ecosystem', {
+ ecosystem,
+ // Workflow log serializes the error via the Temporal SDK.
+ err: err as Error,
+ })
+ }
+ }
+
+ const derive = await osvDeriveCriticalFlag()
+ log.info('osvDeriveCriticalFlag succeeded', { ...derive })
+ log.info('osvSync complete')
+}
diff --git a/services/apps/packages_worker/src/workflows/index.ts b/services/apps/packages_worker/src/workflows/index.ts
index 5bd3dd2623..3687013248 100644
--- a/services/apps/packages_worker/src/workflows/index.ts
+++ b/services/apps/packages_worker/src/workflows/index.ts
@@ -1 +1,2 @@
export { npmHello } from '../npm/workflows'
+export { osvSync } from '../osv/workflows'
diff --git a/services/apps/packages_worker/vitest.config.ts b/services/apps/packages_worker/vitest.config.ts
new file mode 100644
index 0000000000..530cc66032
--- /dev/null
+++ b/services/apps/packages_worker/vitest.config.ts
@@ -0,0 +1,13 @@
+import { defineConfig } from 'vitest/config'
+
+export default defineConfig({
+ test: {
+ environment: 'node',
+ include: ['src/**/*.test.ts'],
+ server: {
+ deps: {
+ inline: [/@crowd\//],
+ },
+ },
+ },
+})
diff --git a/services/libs/data-access-layer/src/index.ts b/services/libs/data-access-layer/src/index.ts
index 2eec73d0dc..2bb2fbb6e6 100644
--- a/services/libs/data-access-layer/src/index.ts
+++ b/services/libs/data-access-layer/src/index.ts
@@ -14,4 +14,5 @@ export * from './systemSettings'
export * from './integrations'
export * from './auditLogs'
export * from './maintainers'
+export * from './packages'
export * from './project-catalog'
diff --git a/services/libs/data-access-layer/src/packages/index.ts b/services/libs/data-access-layer/src/packages/index.ts
new file mode 100644
index 0000000000..4ed98a8325
--- /dev/null
+++ b/services/libs/data-access-layer/src/packages/index.ts
@@ -0,0 +1 @@
+export * from './osv'
diff --git a/services/libs/data-access-layer/src/packages/osv.ts b/services/libs/data-access-layer/src/packages/osv.ts
new file mode 100644
index 0000000000..1a10b0af4d
--- /dev/null
+++ b/services/libs/data-access-layer/src/packages/osv.ts
@@ -0,0 +1,257 @@
+import { QueryExecutor } from '../queryExecutor'
+
+// Database access layer for OSV advisory ingestion and the critical-flag
+// derive pass. All advisories / advisory_packages / advisory_affected_ranges
+// queries used by services/apps/packages_worker live here; the worker keeps
+// the parsing and version-matching logic.
+//
+// Tables are defined in backend/src/osspckgs/migrations/V1779710880__initial_schema.sql.
+// Conventions (ecosystem case, package_name composition, range tuple
+// uniqueness) follow ADR-0001 §OSV.
+
+// ---- input shapes ----
+
+// Mirrors the writable columns of the `advisories` row produced by the OSV
+// parser. cvss_source is a closed enum (see CvssSource in the worker's
+// osv/types.ts) but is typed loosely here so DAL doesn't depend on the
+// parser's typing.
+export interface AdvisoryUpsertInput {
+ osvId: string
+ source: string
+ sourceUrl: string | null
+ aliases: string[]
+ severity: string | null
+ cvss: number | null
+ cvssSource: string | null
+ summary: string | null
+ details: string | null
+ publishedAt: string | null
+ modifiedAt: string | null
+}
+
+export interface AdvisoryPackageUpsertInput {
+ advisoryId: number
+ packageId: number | null
+ ecosystem: string
+ packageName: string
+}
+
+export interface AdvisoryRangeInsertInput {
+ advisoryPackageId: number
+ introducedVersion: string | null
+ fixedVersion: string | null
+ lastAffected: string | null
+}
+
+export interface PackageLookupInput {
+ ecosystem: string
+ namespace: string | null
+ name: string
+}
+
+// ---- query result shapes ----
+
+export interface PackageRow {
+ id: number
+ ecosystem: string
+ latest_version: string
+}
+
+export interface RangeRow {
+ pkg_id: number
+ introduced_version: string | null
+ fixed_version: string | null
+ last_affected: string | null
+ osv_id: string
+}
+
+// ---- upsert path (services/apps/packages_worker/src/osv/upsertAdvisory.ts) ----
+
+export async function upsertAdvisory(
+ qx: QueryExecutor,
+ advisory: AdvisoryUpsertInput,
+): Promise {
+ const row = await qx.selectOne(
+ `
+ INSERT INTO advisories
+ (osv_id, source, source_url, aliases, severity, cvss, cvss_source,
+ summary, details, published_at, modified_at)
+ VALUES
+ ($(osvId), $(source), $(sourceUrl), $(aliases)::text[], $(severity),
+ $(cvss), $(cvssSource), $(summary), $(details),
+ $(publishedAt)::timestamptz, $(modifiedAt)::timestamptz)
+ ON CONFLICT (osv_id) DO UPDATE SET
+ source = EXCLUDED.source,
+ source_url = EXCLUDED.source_url,
+ aliases = EXCLUDED.aliases,
+ severity = EXCLUDED.severity,
+ cvss = EXCLUDED.cvss,
+ cvss_source = EXCLUDED.cvss_source,
+ summary = EXCLUDED.summary,
+ details = EXCLUDED.details,
+ published_at = EXCLUDED.published_at,
+ modified_at = EXCLUDED.modified_at
+ RETURNING id
+ `,
+ advisory,
+ )
+ return row.id as number
+}
+
+// Resolves the packages.id for an OSV-referenced package, or null if the
+// package isn't in our DB yet. Mirrors the COALESCE-aware unique index on
+// packages(ecosystem, COALESCE(namespace,''), name).
+export async function findPackageId(
+ qx: QueryExecutor,
+ pkg: PackageLookupInput,
+): Promise {
+ const row = await qx.selectOneOrNone(
+ `
+ SELECT id
+ FROM packages
+ WHERE ecosystem = $(ecosystem)
+ AND COALESCE(namespace, '') = COALESCE($(namespace), '')
+ AND name = $(name)
+ `,
+ pkg,
+ )
+ return (row?.id as number | undefined) ?? null
+}
+
+export async function upsertAdvisoryPackage(
+ qx: QueryExecutor,
+ input: AdvisoryPackageUpsertInput,
+): Promise {
+ const row = await qx.selectOne(
+ `
+ INSERT INTO advisory_packages
+ (advisory_id, package_id, ecosystem, package_name)
+ VALUES
+ ($(advisoryId), $(packageId), $(ecosystem), $(packageName))
+ ON CONFLICT (advisory_id, ecosystem, package_name) DO UPDATE SET
+ package_id = EXCLUDED.package_id
+ RETURNING id
+ `,
+ input,
+ )
+ return row.id as number
+}
+
+// Delete every advisory_affected_ranges row for this advisory_package that
+// lacks deps.dev raw columns — i.e. every OSV-owned row. The deps.dev BQ
+// worker (future) is expected to populate range_raw / unaffected_raw on rows
+// of its own and never set the structured introduced/fixed/last_affected
+// columns; this predicate scopes the wipe to the OSV pipeline's rows so a
+// deps.dev row is never clobbered on resync. OSV rows where all three
+// structured columns are NULL (e.g. some MAL- "always vulnerable" ranges)
+// are still deleted here and re-inserted from the new payload below — that's
+// fine, the row is OSV-owned and idempotent.
+export async function deleteOsvOnlyRanges(
+ qx: QueryExecutor,
+ advisoryPackageId: number,
+): Promise {
+ await qx.result(
+ `
+ DELETE FROM advisory_affected_ranges
+ WHERE advisory_package_id = $(advisoryPackageId)
+ AND range_raw IS NULL
+ AND unaffected_raw IS NULL
+ `,
+ { advisoryPackageId },
+ )
+}
+
+export async function insertAdvisoryRange(
+ qx: QueryExecutor,
+ range: AdvisoryRangeInsertInput,
+): Promise {
+ await qx.result(
+ `
+ INSERT INTO advisory_affected_ranges
+ (advisory_package_id, introduced_version, fixed_version, last_affected)
+ VALUES
+ ($(advisoryPackageId), $(introducedVersion), $(fixedVersion), $(lastAffected))
+ `,
+ range,
+ )
+}
+
+// ---- derive path (services/apps/packages_worker/src/osv/deriveCriticalFlag.ts) ----
+
+// Catch-up: resolve advisory_packages.package_id rows that were inserted
+// before the package existed in our DB. Idempotent; cheap when there's
+// nothing to do. Driven by the partial index
+// `advisory_packages (ecosystem, package_name) WHERE package_id IS NULL`.
+export async function resolveMissingPackageIds(qx: QueryExecutor): Promise {
+ return await qx.result(`
+ UPDATE advisory_packages ap
+ SET package_id = p.id
+ FROM packages p
+ WHERE ap.package_id IS NULL
+ AND ap.ecosystem = p.ecosystem
+ AND ap.package_name = CASE
+ WHEN p.namespace IS NULL THEN p.name
+ WHEN p.ecosystem = 'maven' THEN p.namespace || ':' || p.name
+ WHEN p.ecosystem = 'npm' THEN '@' || p.namespace || '/' || p.name
+ ELSE p.name
+ END
+ `)
+}
+
+export async function getPackagePage(
+ qx: QueryExecutor,
+ cursor: number,
+ batchSize: number,
+): Promise {
+ return qx.select(
+ `
+ SELECT id, ecosystem, latest_version
+ FROM packages
+ WHERE id > $(cursor) AND latest_version IS NOT NULL
+ ORDER BY id
+ LIMIT $(batchSize)
+ `,
+ { cursor, batchSize },
+ )
+}
+
+// Returns every (range × advisory) row for the given package ids whose
+// advisory is critical or a MAL- malicious-package report. Empty `ids`
+// returns []; the caller is responsible for short-circuiting empty pages.
+export async function getRangesForPackages(qx: QueryExecutor, ids: number[]): Promise {
+ if (ids.length === 0) return []
+ return qx.select(
+ `
+ SELECT
+ ap.package_id AS pkg_id,
+ ar.introduced_version,
+ ar.fixed_version,
+ ar.last_affected,
+ a.osv_id
+ FROM advisory_packages ap
+ JOIN advisories a ON a.id = ap.advisory_id
+ JOIN advisory_affected_ranges ar ON ar.advisory_package_id = ap.id
+ WHERE ap.package_id IN ($(ids:csv))
+ AND (a.is_critical = TRUE OR a.osv_id LIKE 'MAL-%')
+ `,
+ { ids },
+ )
+}
+
+export async function flipVulnerableFlags(qx: QueryExecutor, ids: number[]): Promise {
+ if (ids.length === 0) return 0
+ return qx.result(
+ `UPDATE packages SET has_critical_vulnerability = TRUE
+ WHERE id IN ($(ids:csv)) AND has_critical_vulnerability = FALSE`,
+ { ids },
+ )
+}
+
+export async function clearSafeFlags(qx: QueryExecutor, ids: number[]): Promise {
+ if (ids.length === 0) return 0
+ return qx.result(
+ `UPDATE packages SET has_critical_vulnerability = FALSE
+ WHERE id IN ($(ids:csv)) AND has_critical_vulnerability = TRUE`,
+ { ids },
+ )
+}