-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.go
350 lines (291 loc) · 9.24 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
package main
// This hack uses code from:
// - https://github.com/aquasecurity/trivy
import (
"bufio"
"crypto/sha1" // nolint
"crypto/sha256"
"fmt"
"hash"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"strings"
"github.com/mitchellh/hashstructure/v2"
"github.com/samber/lo"
"golang.org/x/exp/slices"
"golang.org/x/xerrors"
"pault.ag/go/debian/deb"
)
type Algorithm string
func (a Algorithm) String() string {
return string(a)
}
// supported digest types
const (
SHA1 Algorithm = "sha1" // sha1 with hex encoding (lower case only)
SHA256 Algorithm = "sha256" // sha256 with hex encoding (lower case only)
MD5 Algorithm = "md5" // md5 with hex encoding (lower case only)
)
// Digest allows simple protection of hex formatted digest strings, prefixed by their algorithm.
//
// The following is an example of the contents of Digest types:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
type Digest string
// NewDigest returns a Digest from alg and a hash.Hash object.
func NewDigest(alg Algorithm, h hash.Hash) Digest {
return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil)))
}
// NewDigestFromString returns a Digest from alg and a string.
func NewDigestFromString(alg Algorithm, h string) Digest {
return Digest(fmt.Sprintf("%s:%s", alg, h))
}
func (d Digest) Algorithm() Algorithm {
return Algorithm(d[:d.sepIndex()])
}
func (d Digest) Encoded() string {
return string(d[d.sepIndex()+1:])
}
func (d Digest) String() string {
return string(d)
}
func (d Digest) sepIndex() int {
i := strings.Index(string(d), ":")
if i < 0 {
i = 0
}
return i
}
func CalcSHA1(r io.ReadSeeker) (Digest, error) {
defer r.Seek(0, io.SeekStart)
h := sha1.New() // nolint
if _, err := io.Copy(h, r); err != nil {
return "", xerrors.Errorf("unable to calculate sha1 digest: %w", err)
}
return NewDigest(SHA1, h), nil
}
func CalcSHA256(r io.ReadSeeker) (Digest, error) {
defer r.Seek(0, io.SeekStart)
h := sha256.New()
if _, err := io.Copy(h, r); err != nil {
return "", xerrors.Errorf("unable to calculate sha256 digest: %w", err)
}
return NewDigest(SHA256, h), nil
}
type Location struct {
StartLine int `json:",omitempty"`
EndLine int `json:",omitempty"`
}
// BuildInfo represents information under /root/buildinfo in RHEL
type BuildInfo struct {
ContentSets []string `json:",omitempty"`
Nvr string `json:",omitempty"`
Arch string `json:",omitempty"`
}
type Repository struct {
Family string `json:",omitempty"`
Release string `json:",omitempty"`
}
type Layer struct {
Digest string `json:",omitempty"`
DiffID string `json:",omitempty"`
CreatedBy string `json:",omitempty"`
}
type Package struct {
ID string `json:",omitempty"`
Name string `json:",omitempty"`
Version string `json:",omitempty"`
Release string `json:",omitempty"`
Epoch int `json:",omitempty"`
Arch string `json:",omitempty"`
SrcName string `json:",omitempty"`
SrcVersion string `json:",omitempty"`
SrcRelease string `json:",omitempty"`
SrcEpoch int `json:",omitempty"`
Licenses []string `json:",omitempty"`
Maintainer string `json:",omitempty"`
Modularitylabel string `json:",omitempty"` // only for Red Hat based distributions
BuildInfo *BuildInfo `json:",omitempty"` // only for Red Hat
Ref string `json:",omitempty"` // identifier which can be used to reference the component elsewhere
Indirect bool `json:",omitempty"` // this package is direct dependency of the project or not
// Dependencies of this package
// Note: it may have interdependencies, which may lead to infinite loops.
DependsOn []string `json:",omitempty"`
Layer Layer `json:",omitempty"`
// Each package metadata have the file path, while the package from lock files does not have.
FilePath string `json:",omitempty"`
// This is required when using SPDX formats. Otherwise, it will be empty.
Digest Digest `json:",omitempty"`
// lines from the lock file where the dependency is written
Locations []Location `json:",omitempty"`
}
var (
dpkgLicenseAnalyzerVersion = 1
commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`)
licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")
)
// dpkgLicenseAnalyzer parses copyright files and detect licenses
type dpkgLicenseAnalyzer struct {
licenseFull bool
classifierConfidenceLevel float64
}
type LicenseType string
const (
LicenseTypeDpkg LicenseType = "dpkg" // From /usr/share/doc/*/copyright
LicenseTypeHeader LicenseType = "header" // From file headers
LicenseTypeFile LicenseType = "license-file" // From LICENSE, COPYRIGHT, etc.
)
type LicenseCategory string
const (
CategoryForbidden LicenseCategory = "forbidden"
CategoryRestricted LicenseCategory = "restricted"
CategoryReciprocal LicenseCategory = "reciprocal"
CategoryNotice LicenseCategory = "notice"
CategoryPermissive LicenseCategory = "permissive"
CategoryUnencumbered LicenseCategory = "unencumbered"
CategoryUnknown LicenseCategory = "unknown"
)
type LicenseFile struct {
Type LicenseType
FilePath string
PkgName string
Findings LicenseFindings
Layer Layer `json:",omitempty"`
}
type LicenseFindings []LicenseFinding
func (findings LicenseFindings) Len() int {
return len(findings)
}
func (findings LicenseFindings) Swap(i, j int) {
findings[i], findings[j] = findings[j], findings[i]
}
func (findings LicenseFindings) Less(i, j int) bool {
return findings[i].Name < findings[j].Name
}
type LicenseFinding struct {
Category LicenseCategory // such as "forbidden"
Name string
Confidence float64
Link string
}
// Analyze parses /usr/share/doc/*/copyright files
// parseCopyright parses /usr/share/doc/*/copyright files
func parseCopyright(r string) ([]LicenseFinding, error) {
scanner := bufio.NewScanner(strings.NewReader(r))
var licenses []string
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "License:"):
// Machine-readable format
// cf. https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#:~:text=The%20debian%2Fcopyright%20file%20must,in%20the%20Debian%20Policy%20Manual.
l := strings.TrimSpace(line[8:])
l = normalizeLicense(l)
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:
// 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"}
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"}
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"}
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
for _, lic := range licenseSplitRegexp.Split(l, -1) {
lic = normalizeLicense(lic)
if !slices.Contains(licenses, lic) {
licenses = append(licenses, lic)
}
}
}
case strings.Contains(line, "/usr/share/common-licenses/"):
// Common license pattern
license := commonLicenseReferenceRegexp.FindStringSubmatch(line)
if len(license) == 2 {
l := normalizeLicense(license[1])
if !slices.Contains(licenses, l) {
licenses = append(licenses, l)
}
}
}
}
return lo.Map(licenses, func(license string, _ int) LicenseFinding {
return LicenseFinding{Name: license}
}), nil
}
// normalizeLicense returns a normalized license identifier in a heuristic way
func normalizeLicense(s string) string {
// "The MIT License (MIT)" => "The MIT License"
s, _, _ = strings.Cut(s, "(")
// Very rarely has below phrases
s = strings.TrimPrefix(s, "The main library is licensed under ")
s = strings.TrimSuffix(s, " license")
return strings.TrimSpace(s)
}
func main() {
path := os.Args[1]
fd, err := os.Open(path)
if err != nil {
panic(err)
}
defer fd.Close()
debFile, err := deb.Load(fd, path)
if err != nil {
panic(err)
}
// Fill in the metadata - https://pkg.go.dev/pault.ag/go/debian@v0.15.0/deb#Control
c := debFile.Control
p := Package{Name: c.Package}
p.Version = c.Version.Version
p.ID = fmt.Sprintf("%s@%s", p.Name, c.Version.String())
p.Release = c.Version.Revision
p.Epoch = (int)(c.Version.Epoch)
p.Arch = c.Architecture.String()
p.SrcName = p.Name
p.SrcVersion = p.Version
p.SrcRelease = p.Release
p.SrcEpoch = p.Epoch
p.Maintainer = c.Maintainer
/* Canonicalize c.Depends.String() */
depends := strings.Split(c.Depends.String(), ", ")
odepends := depends
for i := 0; i < len(depends); i++ {
odepends[i] = strings.Split(depends[i], " ")[0]
}
p.DependsOn = odepends
// Licensing stuff (this is perhaps a bug / misfeature in Trivy)
tr := debFile.Data
copyright := ""
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
if strings.HasSuffix(hdr.Name, "copyright") {
bs, _ := ioutil.ReadAll(tr)
copyright = string(bs)
}
}
if copyright != "" {
licenses, _ := parseCopyright(copyright)
olicenses := []string{}
for i := 0; i < len(licenses); i++ {
if !strings.Contains(licenses[i].Name, "+") {
olicenses = append(olicenses, licenses[i].Name)
}
}
fmt.Println(olicenses)
p.Licenses = olicenses
}
fmt.Println(p)
/* Hash 'p': */
hash, err := hashstructure.Hash(p, hashstructure.FormatV2, nil)
if err != nil {
panic(err)
}
fmt.Printf("%x\n", hash)
}