From a5b50dcac5495be2f302b7bab77b960b16c6d740 Mon Sep 17 00:00:00 2001 From: Samuele Verzi Date: Tue, 21 Apr 2026 16:45:22 +0200 Subject: [PATCH 1/3] Add build provenance index for skills OCI store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a file-backed index at /builds.json that records which tags in the shared local OCI store were produced by a local Build. The index lives next to the OCI layout, uses atomic temp-file + rename writes, and serializes access with an in-process mutex. This is a self-contained addition — no callers yet. Subsequent commits wire it into the skill service so ListBuilds can filter out artifacts that were only pulled (by install or the content API) for caching. --- pkg/skills/skillsvc/provenance.go | 252 +++++++++++++++++++++++++ pkg/skills/skillsvc/provenance_test.go | 252 +++++++++++++++++++++++++ 2 files changed, 504 insertions(+) create mode 100644 pkg/skills/skillsvc/provenance.go create mode 100644 pkg/skills/skillsvc/provenance_test.go diff --git a/pkg/skills/skillsvc/provenance.go b/pkg/skills/skillsvc/provenance.go new file mode 100644 index 0000000000..28240f28a6 --- /dev/null +++ b/pkg/skills/skillsvc/provenance.go @@ -0,0 +1,252 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package skillsvc + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// provenanceFileName is the name of the JSON sidecar file, stored at the root +// of the OCI skills store, that records which tags were produced by +// `thv skill build`. Tags created by OCI pulls (install, content preview) are +// intentionally absent so they remain invisible to ListBuilds while their +// blobs stay in the OCI store as a cache. +const provenanceFileName = "builds.json" + +// provenanceEntry records a single locally-built skill tag. +type provenanceEntry struct { + Tag string `json:"tag"` + Digest string `json:"digest"` + CreatedAt time.Time `json:"createdAt"` +} + +// provenanceFile is the on-disk JSON document layout. +type provenanceFile struct { + Entries []provenanceEntry `json:"entries"` +} + +// buildProvenance tracks which OCI-store tags were produced by a local build. +// It is an in-process, file-backed index: callers hold a mutex while reading +// or writing the JSON file. Single-process semantics are sufficient since the +// skills service is not shared across processes. +type buildProvenance struct { + mu sync.Mutex + path string +} + +// newBuildProvenance constructs a provenance index rooted at the given OCI +// store root. The file itself is created lazily on first write. +func newBuildProvenance(storeRoot string) *buildProvenance { + return &buildProvenance{ + path: filepath.Join(storeRoot, provenanceFileName), + } +} + +// Exists reports whether the provenance file has been created on disk. Used +// by the migration path to decide whether to grandfather existing tags. +func (p *buildProvenance) Exists() (bool, error) { + if p == nil { + return false, nil + } + _, err := os.Stat(p.path) + if err == nil { + return true, nil + } + if errors.Is(err, os.ErrNotExist) { + return false, nil + } + return false, fmt.Errorf("stat provenance file: %w", err) +} + +// Record adds or updates the entry for tag. The timestamp on updates is +// refreshed so "last built" reflects the most recent build. +func (p *buildProvenance) Record(tag, digest string) error { + if p == nil { + return errors.New("build provenance is not configured") + } + p.mu.Lock() + defer p.mu.Unlock() + + doc, err := p.loadLocked() + if err != nil { + return err + } + + now := time.Now().UTC() + found := false + for i := range doc.Entries { + if doc.Entries[i].Tag == tag { + doc.Entries[i].Digest = digest + doc.Entries[i].CreatedAt = now + found = true + break + } + } + if !found { + doc.Entries = append(doc.Entries, provenanceEntry{ + Tag: tag, + Digest: digest, + CreatedAt: now, + }) + } + + return p.saveLocked(doc) +} + +// Forget removes the entry for tag, if present. A missing tag is not an error. +func (p *buildProvenance) Forget(tag string) error { + if p == nil { + return nil + } + p.mu.Lock() + defer p.mu.Unlock() + + doc, err := p.loadLocked() + if err != nil { + return err + } + + kept := doc.Entries[:0] + removed := false + for _, e := range doc.Entries { + if e.Tag == tag { + removed = true + continue + } + kept = append(kept, e) + } + if !removed { + return nil + } + doc.Entries = kept + return p.saveLocked(doc) +} + +// List returns a copy of all recorded entries. +func (p *buildProvenance) List() ([]provenanceEntry, error) { + if p == nil { + return nil, nil + } + p.mu.Lock() + defer p.mu.Unlock() + + doc, err := p.loadLocked() + if err != nil { + return nil, err + } + out := make([]provenanceEntry, len(doc.Entries)) + copy(out, doc.Entries) + return out, nil +} + +// Has reports whether tag has a recorded entry. +func (p *buildProvenance) Has(tag string) (bool, error) { + if p == nil { + return false, nil + } + p.mu.Lock() + defer p.mu.Unlock() + + doc, err := p.loadLocked() + if err != nil { + return false, err + } + for _, e := range doc.Entries { + if e.Tag == tag { + return true, nil + } + } + return false, nil +} + +// Seed replaces the on-disk state with the given entries. Used by the one-shot +// migration path to grandfather pre-existing tags into the provenance index. +// The provenance file is always (re)written, even when entries is empty, so +// subsequent calls to Exists report true and the migration does not re-run. +func (p *buildProvenance) Seed(entries []provenanceEntry) error { + if p == nil { + return errors.New("build provenance is not configured") + } + p.mu.Lock() + defer p.mu.Unlock() + + doc := &provenanceFile{Entries: make([]provenanceEntry, 0, len(entries))} + doc.Entries = append(doc.Entries, entries...) + return p.saveLocked(doc) +} + +// loadLocked reads and parses the provenance file. Missing files yield an +// empty document. Callers must hold p.mu. +func (p *buildProvenance) loadLocked() (*provenanceFile, error) { + data, err := os.ReadFile(p.path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return &provenanceFile{}, nil + } + return nil, fmt.Errorf("reading provenance file: %w", err) + } + if len(data) == 0 { + return &provenanceFile{}, nil + } + var doc provenanceFile + if err := json.Unmarshal(data, &doc); err != nil { + return nil, fmt.Errorf("parsing provenance file: %w", err) + } + return &doc, nil +} + +// saveLocked atomically writes the provenance file via temp-file + rename. +// Callers must hold p.mu. +func (p *buildProvenance) saveLocked(doc *provenanceFile) error { + if doc.Entries == nil { + doc.Entries = []provenanceEntry{} + } + data, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return fmt.Errorf("marshaling provenance file: %w", err) + } + + dir := filepath.Dir(p.path) + if err := os.MkdirAll(dir, 0o750); err != nil { + return fmt.Errorf("creating provenance directory: %w", err) + } + + tmp, err := os.CreateTemp(dir, provenanceFileName+".*.tmp") + if err != nil { + return fmt.Errorf("creating temp provenance file: %w", err) + } + tmpPath := tmp.Name() + cleanup := func() { + _ = tmp.Close() + _ = os.Remove(tmpPath) + } + + if _, err := tmp.Write(data); err != nil { + cleanup() + return fmt.Errorf("writing temp provenance file: %w", err) + } + if err := tmp.Sync(); err != nil { + cleanup() + return fmt.Errorf("syncing temp provenance file: %w", err) + } + if err := tmp.Close(); err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("closing temp provenance file: %w", err) + } + if err := os.Chmod(tmpPath, 0o600); err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("chmod temp provenance file: %w", err) + } + if err := os.Rename(tmpPath, p.path); err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("renaming provenance file: %w", err) + } + return nil +} diff --git a/pkg/skills/skillsvc/provenance_test.go b/pkg/skills/skillsvc/provenance_test.go new file mode 100644 index 0000000000..7f46f31c76 --- /dev/null +++ b/pkg/skills/skillsvc/provenance_test.go @@ -0,0 +1,252 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package skillsvc + +import ( + "encoding/json" + "os" + "path/filepath" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildProvenance_ExistsStartsFalse(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + exists, err := p.Exists() + require.NoError(t, err) + assert.False(t, exists, "provenance file should not exist before any write") +} + +func TestBuildProvenance_RecordCreatesFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + p := newBuildProvenance(dir) + + require.NoError(t, p.Record("my-skill", "sha256:abc")) + + exists, err := p.Exists() + require.NoError(t, err) + assert.True(t, exists) + + // File lives next to the OCI layout at /builds.json. + info, err := os.Stat(filepath.Join(dir, provenanceFileName)) + require.NoError(t, err) + assert.Greater(t, info.Size(), int64(0)) +} + +func TestBuildProvenance_RecordListForgetRoundTrip(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + require.NoError(t, p.Record("skill-a", "sha256:aaa")) + require.NoError(t, p.Record("skill-b", "sha256:bbb")) + + entries, err := p.List() + require.NoError(t, err) + require.Len(t, entries, 2) + + byTag := map[string]string{} + for _, e := range entries { + byTag[e.Tag] = e.Digest + assert.False(t, e.CreatedAt.IsZero(), "createdAt should be populated") + } + assert.Equal(t, "sha256:aaa", byTag["skill-a"]) + assert.Equal(t, "sha256:bbb", byTag["skill-b"]) + + has, err := p.Has("skill-a") + require.NoError(t, err) + assert.True(t, has) + + require.NoError(t, p.Forget("skill-a")) + + has, err = p.Has("skill-a") + require.NoError(t, err) + assert.False(t, has) + + entries, err = p.List() + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "skill-b", entries[0].Tag) +} + +func TestBuildProvenance_RecordUpdatesExistingEntry(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + require.NoError(t, p.Record("skill-a", "sha256:old")) + require.NoError(t, p.Record("skill-a", "sha256:new")) + + entries, err := p.List() + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "sha256:new", entries[0].Digest) +} + +func TestBuildProvenance_ForgetMissingTagIsNoop(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + require.NoError(t, p.Forget("nonexistent")) + // Forget on a missing provenance file should leave it untouched. + exists, err := p.Exists() + require.NoError(t, err) + assert.False(t, exists) +} + +func TestBuildProvenance_SeedReplacesState(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + require.NoError(t, p.Record("pre-existing", "sha256:zzz")) + + require.NoError(t, p.Seed([]provenanceEntry{ + {Tag: "seeded-a", Digest: "sha256:aaa"}, + {Tag: "seeded-b", Digest: "sha256:bbb"}, + })) + + entries, err := p.List() + require.NoError(t, err) + require.Len(t, entries, 2) + + seen := map[string]bool{} + for _, e := range entries { + seen[e.Tag] = true + } + assert.True(t, seen["seeded-a"]) + assert.True(t, seen["seeded-b"]) + assert.False(t, seen["pre-existing"], "seed should replace prior state") +} + +func TestBuildProvenance_SeedEmptyMarksExists(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + require.NoError(t, p.Seed(nil)) + + exists, err := p.Exists() + require.NoError(t, err) + assert.True(t, exists, "seeding an empty set must still create the file to block re-migration") + + entries, err := p.List() + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestBuildProvenance_LoadHandlesEmptyFile(t *testing.T) { + t.Parallel() + dir := t.TempDir() + p := newBuildProvenance(dir) + + require.NoError(t, os.WriteFile(filepath.Join(dir, provenanceFileName), []byte{}, 0o600)) + + entries, err := p.List() + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestBuildProvenance_LoadRejectsCorruptJSON(t *testing.T) { + t.Parallel() + dir := t.TempDir() + p := newBuildProvenance(dir) + + require.NoError(t, os.WriteFile(filepath.Join(dir, provenanceFileName), []byte("not-json"), 0o600)) + + _, err := p.List() + require.Error(t, err) + assert.Contains(t, err.Error(), "parsing provenance file") +} + +func TestBuildProvenance_AtomicWriteLeavesNoStragglers(t *testing.T) { + t.Parallel() + dir := t.TempDir() + p := newBuildProvenance(dir) + + require.NoError(t, p.Record("skill-a", "sha256:aaa")) + require.NoError(t, p.Record("skill-b", "sha256:bbb")) + require.NoError(t, p.Forget("skill-a")) + + // After several writes, only the final builds.json should remain — + // the atomic temp-file + rename path must not leak "*.tmp" siblings. + entries, err := os.ReadDir(dir) + require.NoError(t, err) + for _, entry := range entries { + assert.NotContains(t, entry.Name(), ".tmp", "temp file left behind: %s", entry.Name()) + } +} + +func TestBuildProvenance_ConcurrentWritesAreSerialized(t *testing.T) { + t.Parallel() + p := newBuildProvenance(t.TempDir()) + + const n = 20 + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func(i int) { + defer wg.Done() + tag := "skill-" + string(rune('a'+i%26)) + _ = p.Record(tag, "sha256:digest") + }(i) + } + wg.Wait() + + // The final file must still be valid JSON regardless of interleaving. + data, err := os.ReadFile(filepath.Join(p.path)) + require.NoError(t, err) + var doc provenanceFile + require.NoError(t, json.Unmarshal(data, &doc)) +} + +func TestBuildProvenance_NilReceiverSafeMethods(t *testing.T) { + t.Parallel() + var p *buildProvenance + + exists, err := p.Exists() + require.NoError(t, err) + assert.False(t, exists) + + entries, err := p.List() + require.NoError(t, err) + assert.Nil(t, entries) + + has, err := p.Has("anything") + require.NoError(t, err) + assert.False(t, has) + + // Forget is a no-op for nil receivers (no store configured). + require.NoError(t, p.Forget("anything")) + + // Record and Seed require configuration; they must error explicitly. + require.Error(t, p.Record("tag", "sha256:abc")) + require.Error(t, p.Seed(nil)) +} + +func TestLooksLikePulledRef(t *testing.T) { + t.Parallel() + + tests := []struct { + tag string + pulled bool + }{ + {"my-skill", false}, + {"v1.0.0", false}, + {"latest", false}, + {"ghcr.io/org/skill:v1", true}, + {"localhost:5000/skill:v1", true}, + {"ghcr.io/org/skill@sha256:abc", true}, + {"org/skill", true}, + } + + for _, tc := range tests { + t.Run(tc.tag, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.pulled, looksLikePulledRef(tc.tag)) + }) + } +} From a8de995ce163b41deb50edc34c502b9078b289c8 Mon Sep 17 00:00:00 2001 From: Samuele Verzi Date: Tue, 21 Apr 2026 16:45:27 +0200 Subject: [PATCH 2/3] Hide pulled skill artifacts from local builds listing Skills pulled into the shared OCI store by install or the content API were tagged for caching and then surfaced by ListBuilds, making every previewed or installed remote skill look like a local build. Wire the build provenance index into the skill service: - New() initialises provenance from ociStore.Root() and runs a one-shot migration that grandfathers pre-existing simple tags (no '/', ':' or '@') so existing users do not lose their builds. - Build records each tagged artifact after the ociStore.Tag call. - DeleteBuild forgets the provenance entry after the store delete. - ListBuilds iterates provenance entries instead of every OCI tag and prunes stale entries whose tags no longer resolve. OCI pulls continue to tag the local store as a cache but are no longer visible to ListBuilds because those paths deliberately skip provenance recording. --- pkg/skills/skillsvc/build.go | 55 ++++++++--- pkg/skills/skillsvc/build_test.go | 147 ++++++++++++++++++++++++++++++ pkg/skills/skillsvc/service.go | 71 +++++++++++++++ 3 files changed, 262 insertions(+), 11 deletions(-) diff --git a/pkg/skills/skillsvc/build.go b/pkg/skills/skillsvc/build.go index c5a762c58a..d65359ae8c 100644 --- a/pkg/skills/skillsvc/build.go +++ b/pkg/skills/skillsvc/build.go @@ -67,6 +67,15 @@ func (s *service) Build(ctx context.Context, opts skills.BuildOptions) (*skills. if tagErr := s.ociStore.Tag(ctx, result.IndexDigest, tag); tagErr != nil { return nil, fmt.Errorf("tagging artifact: %w", tagErr) } + // Record build provenance so ListBuilds surfaces this tag. Tags + // created by OCI pulls (install, content preview) deliberately skip + // this step and stay hidden from ListBuilds while their blobs remain + // as a cache. + if s.provenance != nil { + if recErr := s.provenance.Record(tag, result.IndexDigest.String()); recErr != nil { + return nil, fmt.Errorf("recording build provenance: %w", recErr) + } + } } ref := func() string { @@ -111,6 +120,9 @@ func (s *service) Push(ctx context.Context, opts skills.PushOptions) error { } // ListBuilds returns all locally-built OCI skill artifacts in the local store. +// It iterates the build provenance index (populated by Build) rather than the +// full tag list, so artifacts pulled into the store by install or the content +// API for caching do not appear here. func (s *service) ListBuilds(ctx context.Context) ([]skills.LocalBuild, error) { if s.ociStore == nil { return nil, httperr.WithCode( @@ -118,23 +130,33 @@ func (s *service) ListBuilds(ctx context.Context) ([]skills.LocalBuild, error) { http.StatusInternalServerError, ) } + if s.provenance == nil { + // ociStore is set but provenance failed to initialize; treat as + // empty rather than surfacing every pulled tag. + return []skills.LocalBuild{}, nil + } - tags, err := s.ociStore.ListTags(ctx) + entries, err := s.provenance.List() if err != nil { - return nil, fmt.Errorf("listing local OCI builds: %w", err) + return nil, fmt.Errorf("listing build provenance: %w", err) } - builds := make([]skills.LocalBuild, 0, len(tags)) - for _, tag := range tags { - d, resolveErr := s.ociStore.Resolve(ctx, tag) + builds := make([]skills.LocalBuild, 0, len(entries)) + for _, entry := range entries { + d, resolveErr := s.ociStore.Resolve(ctx, entry.Tag) if resolveErr != nil { - slog.Debug("failed to resolve tag in local OCI store", "tag", tag, "error", resolveErr) + // Tag no longer resolves (e.g. deleted outside of DeleteBuild). + // Prune the stale provenance entry and skip. + slog.Debug("pruning stale provenance entry", "tag", entry.Tag, "error", resolveErr) + if forgetErr := s.provenance.Forget(entry.Tag); forgetErr != nil { + slog.Debug("failed to prune stale provenance entry", "tag", entry.Tag, "error", forgetErr) + } continue } isSkill, typeErr := s.isSkillArtifact(ctx, d) if typeErr != nil { - slog.Debug("failed to check artifact type in local OCI store", "tag", tag, "error", typeErr) + slog.Debug("failed to check artifact type in local OCI store", "tag", entry.Tag, "error", typeErr) continue } if !isSkill { @@ -142,7 +164,7 @@ func (s *service) ListBuilds(ctx context.Context) ([]skills.LocalBuild, error) { } build := skills.LocalBuild{ - Tag: tag, + Tag: entry.Tag, Digest: d.String(), } @@ -152,7 +174,7 @@ func (s *service) ListBuilds(ctx context.Context) ([]skills.LocalBuild, error) { build.Description = cfg.Description build.Version = cfg.Version } else if extractErr != nil { - slog.Debug("failed to extract skill config from local build", "tag", tag, "error", extractErr) + slog.Debug("failed to extract skill config from local build", "tag", entry.Tag, "error", extractErr) } builds = append(builds, build) @@ -163,7 +185,7 @@ func (s *service) ListBuilds(ctx context.Context) ([]skills.LocalBuild, error) { // DeleteBuild removes a locally-built OCI skill artifact from the local store. // It deletes the tag and, when no other tag shares the same digest, also -// garbage-collects all associated blobs. +// garbage-collects all associated blobs. The provenance entry is also forgotten. func (s *service) DeleteBuild(ctx context.Context, tag string) error { if s.ociStore == nil { return httperr.WithCode( @@ -171,7 +193,18 @@ func (s *service) DeleteBuild(ctx context.Context, tag string) error { http.StatusInternalServerError, ) } - return s.ociStore.DeleteBuild(ctx, tag) + if err := s.ociStore.DeleteBuild(ctx, tag); err != nil { + return err + } + if s.provenance != nil { + if forgetErr := s.provenance.Forget(tag); forgetErr != nil { + // The underlying artifact is gone; failing to clean up the + // provenance entry is non-fatal — ListBuilds will prune stale + // entries on the next call. + slog.Debug("failed to forget provenance entry", "tag", tag, "error", forgetErr) + } + } + return nil } // validateLocalPath checks that a path is non-empty, absolute, and does not diff --git a/pkg/skills/skillsvc/build_test.go b/pkg/skills/skillsvc/build_test.go index 04569e8e7a..02bd680c74 100644 --- a/pkg/skills/skillsvc/build_test.go +++ b/pkg/skills/skillsvc/build_test.go @@ -528,6 +528,98 @@ func TestListBuilds(t *testing.T) { require.Len(t, artifacts, 1) assert.Equal(t, "real-skill", artifacts[0].Tag) }) + + t.Run("pulled tags are hidden from ListBuilds", func(t *testing.T) { + t.Parallel() + storeRoot := t.TempDir() + ociStore, err := ociskills.NewStore(storeRoot) + require.NoError(t, err) + + // Construct the service first so the provenance migration runs on an + // empty store and records nothing. + svc := New(&storage.NoopSkillStore{}, WithOCIStore(ociStore)) + + // Simulate a pull: a skill artifact tagged with a fully-qualified OCI + // reference. Build provenance is intentionally not touched by pulls. + d := buildTestArtifact(t, ociStore, "my-skill", "1.0.0") + require.NoError(t, ociStore.Tag(t.Context(), d, "ghcr.io/org/my-skill:v1.0.0")) + + artifacts, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + assert.Empty(t, artifacts, "pulled tags must not appear in ListBuilds") + }) + + t.Run("only Build-recorded tags are listed when pull and build coexist", func(t *testing.T) { + t.Parallel() + storeRoot := t.TempDir() + ociStore, err := ociskills.NewStore(storeRoot) + require.NoError(t, err) + + svc := New(&storage.NoopSkillStore{}, WithOCIStore(ociStore)) + + // Pre-stage a "pulled" artifact — never goes through provenance. + pulled := buildTestArtifact(t, ociStore, "pulled-skill", "9.9.9") + require.NoError(t, ociStore.Tag(t.Context(), pulled, "ghcr.io/org/pulled-skill:v9.9.9")) + + // Manually record a build provenance entry to simulate a Build call + // without exercising the full packager path. Pointing at an existing + // skill digest is sufficient for ListBuilds to surface it. + svcImpl := svc.(*service) + built := buildTestArtifact(t, ociStore, "built-skill", "1.0.0") + require.NoError(t, ociStore.Tag(t.Context(), built, "built-skill")) + require.NoError(t, svcImpl.provenance.Record("built-skill", built.String())) + + artifacts, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + require.Len(t, artifacts, 1) + assert.Equal(t, "built-skill", artifacts[0].Tag) + }) + + t.Run("migration grandfathers existing simple tags as builds", func(t *testing.T) { + t.Parallel() + storeRoot := t.TempDir() + ociStore, err := ociskills.NewStore(storeRoot) + require.NoError(t, err) + + // Tags present BEFORE the service is constructed should be migrated. + builtDigest := buildTestArtifact(t, ociStore, "legacy-build", "1.0.0") + require.NoError(t, ociStore.Tag(t.Context(), builtDigest, "legacy-build")) + pulledDigest := buildTestArtifact(t, ociStore, "legacy-pull", "2.0.0") + require.NoError(t, ociStore.Tag(t.Context(), pulledDigest, "ghcr.io/org/legacy-pull:v2")) + + svc := New(&storage.NoopSkillStore{}, WithOCIStore(ociStore)) + artifacts, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + require.Len(t, artifacts, 1, "simple tags migrate, OCI-ref tags do not") + assert.Equal(t, "legacy-build", artifacts[0].Tag) + + // builds.json must exist so the migration does not repeat. + info, statErr := os.Stat(filepath.Join(storeRoot, provenanceFileName)) + require.NoError(t, statErr) + assert.Greater(t, info.Size(), int64(0)) + }) + + t.Run("stale provenance entries are pruned on list", func(t *testing.T) { + t.Parallel() + storeRoot := t.TempDir() + ociStore, err := ociskills.NewStore(storeRoot) + require.NoError(t, err) + + svc := New(&storage.NoopSkillStore{}, WithOCIStore(ociStore)) + svcImpl := svc.(*service) + + // Record a provenance entry for a tag that is not (yet) in the store. + require.NoError(t, svcImpl.provenance.Record("ghost-tag", "sha256:aaa")) + + artifacts, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + assert.Empty(t, artifacts) + + // The stale entry must be pruned as a side effect. + has, err := svcImpl.provenance.Has("ghost-tag") + require.NoError(t, err) + assert.False(t, has, "stale provenance entry should have been pruned") + }) } func TestDeleteBuild(t *testing.T) { @@ -587,4 +679,59 @@ func TestDeleteBuild(t *testing.T) { require.Len(t, builds, 1) assert.Equal(t, "tag-b", builds[0].Tag) }) + + t.Run("delete removes provenance entry", func(t *testing.T) { + t.Parallel() + ociStore, err := ociskills.NewStore(t.TempDir()) + require.NoError(t, err) + + d := buildTestArtifact(t, ociStore, "my-skill", "1.0.0") + require.NoError(t, ociStore.Tag(t.Context(), d, "my-skill")) + + svc := New(&storage.NoopSkillStore{}, WithOCIStore(ociStore)) + svcImpl := svc.(*service) + + // Migration should have seeded the entry. + has, err := svcImpl.provenance.Has("my-skill") + require.NoError(t, err) + require.True(t, has) + + require.NoError(t, svc.DeleteBuild(t.Context(), "my-skill")) + + has, err = svcImpl.provenance.Has("my-skill") + require.NoError(t, err) + assert.False(t, has, "provenance entry must be forgotten after DeleteBuild") + }) +} + +func TestBuild_RecordsProvenance(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + ociStore, err := ociskills.NewStore(t.TempDir()) + require.NoError(t, err) + + d := buildTestArtifact(t, ociStore, "my-skill", "1.0.0") + + p := ocimocks.NewMockSkillPackager(ctrl) + p.EXPECT().Package(gomock.Any(), "/some/dir", gomock.Any()). + Return(&ociskills.PackageResult{ + IndexDigest: d, + Config: &ociskills.SkillConfig{Name: "my-skill"}, + }, nil) + + svc := New(&storage.NoopSkillStore{}, + WithPackager(p), + WithOCIStore(ociStore), + ) + + _, err = svc.Build(t.Context(), skills.BuildOptions{Path: "/some/dir", Tag: "my-skill"}) + require.NoError(t, err) + + // After a successful Build, the tag must be surfaced by ListBuilds + // because the build provenance index recorded it. + builds, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + require.Len(t, builds, 1) + assert.Equal(t, "my-skill", builds[0].Tag) } diff --git a/pkg/skills/skillsvc/service.go b/pkg/skills/skillsvc/service.go index ef87558eab..9b03bc6975 100644 --- a/pkg/skills/skillsvc/service.go +++ b/pkg/skills/skillsvc/service.go @@ -5,7 +5,11 @@ package skillsvc import ( + "context" + "log/slog" + "strings" "sync" + "time" ociskills "github.com/stacklok/toolhive-core/oci/skills" regtypes "github.com/stacklok/toolhive-core/registry/types" @@ -118,6 +122,11 @@ type service struct { registry ociskills.RegistryClient skillLookup SkillLookup gitResolver gitresolver.Resolver + // provenance records which OCI-store tags were produced by `thv skill + // build`. Tags created by OCI pulls (install, content preview) are + // deliberately absent so they remain invisible to ListBuilds while their + // blobs stay in the store as a cache. Nil when ociStore is not configured. + provenance *buildProvenance } // New creates a new SkillService backed by the given store. @@ -135,5 +144,67 @@ func New(store storage.SkillStore, opts ...Option) skills.SkillService { if s.gitResolver == nil { s.gitResolver = gitresolver.NewResolver() } + if s.ociStore != nil { + s.provenance = newBuildProvenance(s.ociStore.Root()) + migrateBuildProvenance(s.ociStore, s.provenance) + } return s } + +// migrateBuildProvenance seeds the provenance file on first run so tags +// present before this feature existed continue to appear in ListBuilds. +// Tags that look like fully-qualified OCI references (contain '/', ':' or '@') +// are treated as pulled artifacts and omitted. Best-effort: any error is +// logged and swallowed so service construction never fails on a migration +// glitch. +func migrateBuildProvenance(store *ociskills.Store, p *buildProvenance) { + if store == nil || p == nil { + return + } + exists, err := p.Exists() + if err != nil { + slog.Warn("checking build provenance file", "error", err) + return + } + if exists { + return + } + + ctx := context.Background() + tags, err := store.ListTags(ctx) + if err != nil { + slog.Warn("listing OCI tags for provenance migration", "error", err) + return + } + + now := time.Now().UTC() + entries := make([]provenanceEntry, 0, len(tags)) + for _, tag := range tags { + if looksLikePulledRef(tag) { + continue + } + d, resolveErr := store.Resolve(ctx, tag) + if resolveErr != nil { + slog.Debug("resolving tag during provenance migration", "tag", tag, "error", resolveErr) + continue + } + entries = append(entries, provenanceEntry{ + Tag: tag, + Digest: d.String(), + CreatedAt: now, + }) + } + + if err := p.Seed(entries); err != nil { + slog.Warn("seeding build provenance file", "error", err) + } +} + +// looksLikePulledRef returns true when a tag string resembles a +// fully-qualified OCI reference (e.g. "ghcr.io/org/skill:v1") rather than a +// simple build tag (e.g. "my-skill" or "v1.0.0"). Used only by the one-shot +// migration — ongoing provenance decisions are explicit (Build records, +// pulls do not). +func looksLikePulledRef(tag string) bool { + return strings.ContainsAny(tag, "/:@") +} From 4c3598b3de0a71549e1c8e20be6f759efdae99fe Mon Sep 17 00:00:00 2001 From: Samuele Verzi Date: Tue, 21 Apr 2026 16:45:33 +0200 Subject: [PATCH 3/3] Assert OCI pull paths do not leak into ListBuilds Add regression tests that simulate the real Pull tagging side effect for both GetContent and installFromOCI and assert ListBuilds stays empty afterwards. Annotate each registry.Pull call site with a comment stating that skipping build provenance is intentional, so these paths are not silently 'fixed' later by adding a Record call. --- pkg/skills/skillsvc/content.go | 4 ++ pkg/skills/skillsvc/content_test.go | 42 ++++++++++++++++++++ pkg/skills/skillsvc/install_oci.go | 3 ++ pkg/skills/skillsvc/install_oci_test.go | 53 +++++++++++++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/pkg/skills/skillsvc/content.go b/pkg/skills/skillsvc/content.go index 507cf6a13d..540787885d 100644 --- a/pkg/skills/skillsvc/content.go +++ b/pkg/skills/skillsvc/content.go @@ -165,6 +165,10 @@ func (s *service) getContentFromOCI(ctx context.Context, ref string) (*skills.Sk pullCtx, cancel := context.WithTimeout(ctx, ociPullTimeout) defer cancel() + // Content-preview pulls intentionally do NOT record build provenance: + // the pulled blobs stay in the OCI store as a cache, but the tag is + // invisible to ListBuilds so remote skills browsed via the content + // API don't pollute the local builds listing. var pullErr error d, pullErr = s.registry.Pull(pullCtx, s.ociStore, qualifiedRef) if pullErr != nil { diff --git a/pkg/skills/skillsvc/content_test.go b/pkg/skills/skillsvc/content_test.go index 2ee52b512a..51ef06aecf 100644 --- a/pkg/skills/skillsvc/content_test.go +++ b/pkg/skills/skillsvc/content_test.go @@ -255,6 +255,48 @@ func TestGetContent(t *testing.T) { assert.Contains(t, content.Body, "Skill Creator") }) + t.Run("remote pull does not pollute ListBuilds", func(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + + ociStore, err := ociskills.NewStore(t.TempDir()) + require.NoError(t, err) + + // The real Pull would tag the pulled artifact in the local store. + // Simulate that side-effect here so we can verify ListBuilds still + // reports an empty list (pulls must not touch build provenance). + reg := ocimocks.NewMockRegistryClient(ctrl) + reg.EXPECT(). + Pull(gomock.Any(), ociStore, "ghcr.io/org/my-skill:v1"). + DoAndReturn(func(ctx context.Context, store *ociskills.Store, _ string) (godigest.Digest, error) { + d := buildTestArtifact(t, store, "my-skill", "1.0.0") + require.NoError(t, store.Tag(ctx, d, "ghcr.io/org/my-skill:v1")) + return d, nil + }) + + svc := New(&storage.NoopSkillStore{}, + WithOCIStore(ociStore), + WithRegistryClient(reg), + ) + + // Baseline: no builds before the content request. + builds, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + require.Empty(t, builds) + + content, err := svc.GetContent(t.Context(), skills.ContentOptions{ + Reference: "ghcr.io/org/my-skill:v1", + }) + require.NoError(t, err) + assert.Equal(t, "my-skill", content.Name) + + // After a content-preview pull, ListBuilds must still be empty: the + // blobs stay on disk as a cache but the tag is not treated as a local build. + builds, err = svc.ListBuilds(t.Context()) + require.NoError(t, err) + assert.Empty(t, builds, "content API must not leak pulled artifacts into ListBuilds") + }) + t.Run("qualified namespace/name filters registry matches", func(t *testing.T) { t.Parallel() ctrl := gomock.NewController(t) diff --git a/pkg/skills/skillsvc/install_oci.go b/pkg/skills/skillsvc/install_oci.go index bdd7bad17f..762cec5216 100644 --- a/pkg/skills/skillsvc/install_oci.go +++ b/pkg/skills/skillsvc/install_oci.go @@ -52,6 +52,9 @@ func (s *service) installFromOCI( pullCtx, cancel := context.WithTimeout(ctx, ociPullTimeout) defer cancel() + // Install pulls intentionally do NOT record build provenance: the pulled + // blobs stay in the OCI store as a cache, but the tag is invisible to + // ListBuilds so installed remote skills don't appear as local builds. pulledDigest, err := s.registry.Pull(pullCtx, s.ociStore, ociRef) if err != nil { return nil, httperr.WithCode( diff --git a/pkg/skills/skillsvc/install_oci_test.go b/pkg/skills/skillsvc/install_oci_test.go index 9a51da7ac5..b89aad39a0 100644 --- a/pkg/skills/skillsvc/install_oci_test.go +++ b/pkg/skills/skillsvc/install_oci_test.go @@ -301,6 +301,59 @@ func TestInstallFromOCI(t *testing.T) { } } +func TestInstallFromOCI_DoesNotLeakIntoListBuilds(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + + ociStore, err := ociskills.NewStore(tempDir(t)) + require.NoError(t, err) + + indexDigest := buildTestArtifact(t, ociStore, "my-skill", "1.0.0") + + // Simulate the real Pull side effect: tag the artifact in the local + // store. installFromOCI should still not touch build provenance. + reg := ocimocks.NewMockRegistryClient(ctrl) + reg.EXPECT(). + Pull(gomock.Any(), ociStore, "ghcr.io/org/my-skill:v1"). + DoAndReturn(func(ctx context.Context, store *ociskills.Store, _ string) (godigest.Digest, error) { + require.NoError(t, store.Tag(ctx, indexDigest, "ghcr.io/org/my-skill:v1")) + return indexDigest, nil + }) + + skillStore := storemocks.NewMockSkillStore(ctrl) + skillStore.EXPECT().Get(gomock.Any(), "my-skill", skills.ScopeUser, ""). + Return(skills.InstalledSkill{}, storage.ErrNotFound) + skillStore.EXPECT().Create(gomock.Any(), gomock.Any()).Return(nil) + + pr := skillsmocks.NewMockPathResolver(ctrl) + targetDir := filepath.Join(tempDir(t), "installed", "my-skill") + pr.EXPECT().GetSkillPath("claude-code", "my-skill", skills.ScopeUser, "").Return(targetDir, nil) + + svc := New(skillStore, + WithRegistryClient(reg), + WithOCIStore(ociStore), + WithPathResolver(pr), + ) + + // Baseline: no builds before the install. + builds, err := svc.ListBuilds(t.Context()) + require.NoError(t, err) + require.Empty(t, builds) + + _, err = svc.Install(t.Context(), skills.InstallOptions{ + Name: "ghcr.io/org/my-skill:v1", + Clients: []string{"claude-code"}, + }) + require.NoError(t, err) + + // After the install, the OCI store contains the pulled artifact but + // ListBuilds must still be empty — only `thv skill build` output shows up. + builds, err = svc.ListBuilds(t.Context()) + require.NoError(t, err) + assert.Empty(t, builds, "install pulls must not leak into ListBuilds") +} + func TestInstallFromLocalStore(t *testing.T) { t.Parallel()