diff --git a/go.mod b/go.mod index 4348240..e40f186 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,10 @@ module github.com/openvex/go-vex -go 1.22 +go 1.24.8 require ( github.com/google/go-cmp v0.7.0 + github.com/in-toto/attestation v1.1.2 github.com/in-toto/in-toto-golang v0.9.0 github.com/owenrumney/go-sarif v1.1.1 gopkg.in/yaml.v3 v3.0.1 @@ -18,6 +19,7 @@ require ( github.com/zclconf/go-cty v1.10.0 // indirect golang.org/x/crypto v0.17.0 // indirect golang.org/x/text v0.14.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) diff --git a/go.sum b/go.sum index 419b958..02af5e1 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaW github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/in-toto/attestation v1.1.2 h1:MBFn6lsMq6dptQZJBhalXTcWMb/aJy3V+GX3VYj/V1E= +github.com/in-toto/attestation v1.1.2/go.mod h1:gYFddHMZj3DiQ0b62ltNi1Vj5rC879bTmBbrv9CRHpM= github.com/in-toto/in-toto-golang v0.9.0 h1:tHny7ac4KgtsfrG6ybU8gVOZux2H8jN05AXJ9EBM1XU= github.com/in-toto/in-toto-golang v0.9.0/go.mod h1:xsBVrVsHNsB61++S6Dy2vWosKhuA3lUTQd+eF9HdeMo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -58,6 +60,8 @@ golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/pkg/index/filters.go b/pkg/index/filters.go new file mode 100644 index 0000000..776d9ab --- /dev/null +++ b/pkg/index/filters.go @@ -0,0 +1,96 @@ +// Copyright 2025 The OpenVEX Authors +// SPDX-License-Identifier: Apache-2.0 + +package index + +import "github.com/openvex/go-vex/pkg/vex" + +// Filter is an internal object that abstracs a function that +// when called, extracts vex statements from an index, returning them +// in a slice ordered by pointers so the matching vex statements. +// +// Filters are used by the index `Matches()` function which calls the +// filters, deduplicates the results and returns the collection of matching +// statements. +type Filter func() map[*vex.Statement]struct{} + +// A FilterFunc is a function that returns a Filter when called. FilterFuncs are +// meant to be used as arguments to the `Matches()` index function. +type FilterFunc func(*StatementIndex) Filter + +// WithVulnerability returns a filter that matches a vulnerability. +func WithVulnerability(vuln *vex.Vulnerability) FilterFunc { + return func(si *StatementIndex) Filter { + return func() map[*vex.Statement]struct{} { + ret := map[*vex.Statement]struct{}{} + ids := []vex.VulnerabilityID{} + if vuln.Name != "" { + ids = append(ids, vuln.Name) + } + ids = append(ids, vuln.Aliases...) + + for _, id := range ids { + for _, s := range si.vulnIndex[string(id)] { + ret[s] = struct{}{} + } + } + return ret + } + } +} + +// WithProduct returns a filter that indexes a product by its ID, +// identifiers and hashes. +func WithProduct(prod *vex.Product) FilterFunc { + return func(si *StatementIndex) Filter { + return func() map[*vex.Statement]struct{} { + ret := map[*vex.Statement]struct{}{} + ids := []string{} + if prod.ID != "" { + ids = append(ids, prod.ID) + } + for _, id := range prod.Identifiers { + ids = append(ids, id) + } + for _, h := range prod.Hashes { + ids = append(ids, string(h)) + } + + for _, id := range ids { + for _, s := range si.prodIndex[id] { + ret[s] = struct{}{} + } + } + + return ret + } + } +} + +// WithSubcomponent adds a subcomponent filter to the search criteria, indexing +// by ID, identifiers and hashes. +func WithSubcomponent(subc *vex.Subcomponent) FilterFunc { + return func(si *StatementIndex) Filter { + return func() map[*vex.Statement]struct{} { + ret := map[*vex.Statement]struct{}{} + ids := []string{} + if subc.ID != "" { + ids = append(ids, subc.ID) + } + for _, id := range subc.Identifiers { + ids = append(ids, id) + } + for _, h := range subc.Hashes { + ids = append(ids, string(h)) + } + + for _, id := range ids { + for _, s := range si.subIndex[id] { + ret[s] = struct{}{} + } + } + + return ret + } + } +} diff --git a/pkg/index/index.go b/pkg/index/index.go new file mode 100644 index 0000000..2e785ca --- /dev/null +++ b/pkg/index/index.go @@ -0,0 +1,172 @@ +// Copyright 2025 The OpenVEX Authors +// SPDX-License-Identifier: Apache-2.0 + +package index + +import ( + "fmt" + "slices" + + "github.com/openvex/go-vex/pkg/vex" +) + +// New creates a new VEX index with the specified functions +func New(funcs ...constructorFunc) (*StatementIndex, error) { + si := &StatementIndex{} + for _, fn := range funcs { + if err := fn(si); err != nil { + return nil, err + } + } + return si, nil +} + +type constructorFunc func(*StatementIndex) error + +// WithDocument adds all the statements in a document to the index +func WithDocument(doc *vex.VEX) constructorFunc { + return func(si *StatementIndex) error { + statements := []*vex.Statement{} + for i := range doc.Statements { + statements = append(statements, &doc.Statements[i]) + } + si.IndexStatements(statements) + return nil + } +} + +// WithStatements adds statements to a newly created index +func WithStatements(statements []*vex.Statement) constructorFunc { + return func(si *StatementIndex) error { + si.IndexStatements(statements) + return nil + } +} + +// StatementIndex is the OpenVEX statement indexer. An index reads into memory +// vex statements and catalogs them by the fields in their components +// (vulnerability, product, subcomponents). +// +// The index exposes a StatementIndex.Match() function that takes in Filters +// to return indexed statements that match the filter criteria. +type StatementIndex struct { + vulnIndex map[string][]*vex.Statement + prodIndex map[string][]*vex.Statement + subIndex map[string][]*vex.Statement +} + +// IndexStatements indexes all the passed statements by cataloguing the +// fields in the product, vulnerability and subcomponents. +func (si *StatementIndex) IndexStatements(statements []*vex.Statement) { + si.vulnIndex = map[string][]*vex.Statement{} + si.prodIndex = map[string][]*vex.Statement{} + si.subIndex = map[string][]*vex.Statement{} + + for _, s := range statements { + for _, p := range s.Products { + if p.ID != "" { + si.prodIndex[p.ID] = append(si.prodIndex[p.ID], s) + } + for _, id := range p.Identifiers { + if !slices.Contains(si.prodIndex[id], s) { + si.prodIndex[id] = append(si.prodIndex[id], s) + } + } + for algo, h := range p.Hashes { + if !slices.Contains(si.prodIndex[string(h)], s) { + si.prodIndex[string(h)] = append(si.prodIndex[string(h)], s) + } + if !slices.Contains(si.prodIndex[fmt.Sprintf("%s:%s", algo, h)], s) { + si.prodIndex[fmt.Sprintf("%s:%s", algo, h)] = append(si.prodIndex[fmt.Sprintf("%s:%s", algo, h)], s) + } + intotoAlgo := algo.ToInToto() + if intotoAlgo == "" { + continue + } + if !slices.Contains(si.prodIndex[fmt.Sprintf("%s:%s", intotoAlgo, h)], s) { + si.prodIndex[fmt.Sprintf("%s:%s", intotoAlgo, h)] = append(si.prodIndex[fmt.Sprintf("%s:%s", intotoAlgo, h)], s) + } + } + + // Index the subcomponents + for _, sc := range p.Subcomponents { + // Match by ID too + if sc.ID != "" && !slices.Contains(si.subIndex[sc.ID], s) { + si.subIndex[sc.ID] = append(si.subIndex[sc.ID], s) + } + for _, id := range sc.Identifiers { + if !slices.Contains(si.subIndex[id], s) { + si.subIndex[id] = append(si.subIndex[id], s) + } + } + for _, h := range sc.Hashes { + if !slices.Contains(si.subIndex[string(h)], s) { + si.subIndex[string(h)] = append(si.subIndex[string(h)], s) + } + } + } + } + + if s.Vulnerability.Name != "" { + if !slices.Contains(si.vulnIndex[string(s.Vulnerability.Name)], s) { + si.vulnIndex[string(s.Vulnerability.Name)] = append(si.vulnIndex[string(s.Vulnerability.Name)], s) + } + } + for _, alias := range s.Vulnerability.Aliases { + if !slices.Contains(si.vulnIndex[string(alias)], s) { + si.vulnIndex[string(alias)] = append(si.vulnIndex[string(alias)], s) + } + } + } +} + +// unionIndexResults +func unionIndexResults(results []map[*vex.Statement]struct{}) []*vex.Statement { + if len(results) == 0 { + return []*vex.Statement{} + } + preret := map[*vex.Statement]struct{}{} + // Since we're looking for statements in all results, we can just + // cycle the shortest list against the others + slices.SortFunc(results, func(a, b map[*vex.Statement]struct{}) int { + if len(a) == len(b) { + return 0 + } + if len(a) < len(b) { + return -1 + } + return 1 + }) + + var found bool + for s := range results[0] { + // if this is present in all lists, we're in + found = true + for i := range results[1:] { + if _, ok := results[i][s]; !ok { + found = false + break + } + } + if found { + preret[s] = struct{}{} + } + } + + // Now assemble the list + ret := []*vex.Statement{} + for s := range preret { + ret = append(ret, s) + } + return ret +} + +// Matches applies filters to the index to look for matching statements +func (si *StatementIndex) Matches(filterfunc ...FilterFunc) []*vex.Statement { + lists := []map[*vex.Statement]struct{}{} + for _, ffunc := range filterfunc { + filter := ffunc(si) + lists = append(lists, filter()) + } + return unionIndexResults(lists) +} diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go new file mode 100644 index 0000000..edc0b5f --- /dev/null +++ b/pkg/index/index_test.go @@ -0,0 +1,139 @@ +// Copyright 2025 The OpenVEX Authors +// SPDX-License-Identifier: Apache-2.0 + +package index + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/openvex/go-vex/pkg/vex" +) + +func statementList(t *testing.T) []*vex.Statement { + t.Helper() + return []*vex.Statement{ + { + Vulnerability: vex.Vulnerability{ + Name: "CVE-1234-56789", + Aliases: []vex.VulnerabilityID{ + "GHE-1234-56789", + }, + }, + Products: []vex.Product{ + { + Component: vex.Component{ + Hashes: map[vex.Algorithm]vex.Hash{ + vex.SHA256: "cdd80609c252ba5336de7033518cfe15f9e466a53c1de14545cc6ec22e56252b", + vex.SHA512: "382d6447ce20980b363fb0e6e7b7e9a4544dac3bc7c8ee5e5cf78f4d5982ddfaf02dc287b58693de44d1117851219bb435dc4bc2c6a9b0a75779a2fbc84f5e6f", + }, + }, + Subcomponents: []vex.Subcomponent{ + { + Component: vex.Component{ + Identifiers: map[vex.IdentifierType]string{ + vex.PURL: "golang:github.com/my/package@1.2", + }, + }, + }, + { + Component: vex.Component{ + Identifiers: map[vex.IdentifierType]string{ + vex.PURL: "golang:github.com/my/other/package@2.0", + }, + }, + }, + }, + }, + }, + }, + { + Vulnerability: vex.Vulnerability{ + Name: "CVE-9876-54321", + Aliases: []vex.VulnerabilityID{ + "GHE-9876-54321", + }, + }, + Products: []vex.Product{ + { + Component: vex.Component{ + Hashes: map[vex.Algorithm]vex.Hash{ + vex.SHA256: "eb69e4dc450281ac1ac675e45cff08c8452241d4664b713ea9859902272536fa", + }, + Identifiers: map[vex.IdentifierType]string{ + vex.PURL: "oci:alpine@eb69e4dc450281ac1ac675e45cff08c8452241d4664b713ea9859902272536fa", + }, + }, + }, + { + Component: vex.Component{ + Hashes: map[vex.Algorithm]vex.Hash{ + vex.SHA1: "f77d09006b5a5977faaedf8857cdace0247901ba", + }, + }, + Subcomponents: []vex.Subcomponent{ + { + Component: vex.Component{ + Identifiers: map[vex.IdentifierType]string{ + vex.PURL: "npm:chido@1.2", + }, + }, + }, + { + Component: vex.Component{ + Identifiers: map[vex.IdentifierType]string{ + vex.PURL: "npm:otrchido@2.0", + }, + }, + }, + }, + }, + }, + }, + } +} + +func TestMatch(t *testing.T) { + t.Parallel() + for _, tc := range []struct { + name string + filters []FilterFunc + expectedLength int + }{ + {name: "test", filters: []FilterFunc{}, expectedLength: 0}, + {name: "vuln", filters: []FilterFunc{WithVulnerability(&vex.Vulnerability{Name: "CVE-1234-56789"})}, expectedLength: 1}, + {name: "vulnAlias", filters: []FilterFunc{WithVulnerability(&vex.Vulnerability{Name: "CVE-1234-56789", Aliases: []vex.VulnerabilityID{"GHE-1234-56789"}})}, expectedLength: 1}, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + list := statementList(t) + si := &StatementIndex{} + si.IndexStatements(list) + + // Match and apply the filters + res := si.Matches(tc.filters...) + require.Len(t, res, tc.expectedLength) + }) + } +} + +func TestIndexStatements(t *testing.T) { + t.Parallel() + for _, tc := range []struct { + name string + }{ + {name: "test"}, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + list := statementList(t) + si := &StatementIndex{} + si.IndexStatements(list) + + require.Len(t, si.prodIndex, 12) + require.Len(t, si.vulnIndex, 4) + require.Len(t, si.subIndex, 4) + }) + } +} diff --git a/pkg/vex/product.go b/pkg/vex/product.go index a55895e..e6fc610 100644 --- a/pkg/vex/product.go +++ b/pkg/vex/product.go @@ -3,6 +3,10 @@ package vex +import ( + intoto "github.com/in-toto/attestation/go/v1" +) + // Product abstracts the VEX product into a struct that can identify software // through various means. The main one is the ID field which contains an IRI // identifying the product, possibly pointing to another document with more data, @@ -76,3 +80,30 @@ const ( BLAKE2B512 Algorithm = "blake2b-512" BLAKE3 Algorithm = "blake3" ) + +// ToInToto returns the intoto equivalent string of the algorithm string +func (algo Algorithm) ToInToto() string { + //nolint:exhaustive // Not all VEX algorithms have an intoto equivalent + switch algo { + case SHA256: + return intoto.AlgorithmSHA256.String() + case SHA512: + return intoto.AlgorithmSHA512.String() + case SHA1: + return intoto.AlgorithmSHA1.String() + case MD5: + return intoto.AlgorithmMD5.String() + case SHA384: + return intoto.AlgorithmSHA384.String() + case SHA3224: + return intoto.AlgorithmSHA3_224.String() + case SHA3256: + return intoto.AlgorithmSHA3_256.String() + case SHA3384: + return intoto.AlgorithmSHA3_384.String() + case SHA3512: + return intoto.AlgorithmSHA3_512.String() + default: + return "" + } +}