From e9b9ae9e0fc8cda220e471edf3b1a79f702fbfeb Mon Sep 17 00:00:00 2001 From: Jacob Woliver Date: Fri, 27 Oct 2023 16:14:02 -0400 Subject: [PATCH 1/2] better handle multiple paths to parse This changes Parse(string) to Parse(...string) so that it can handle both a single path, as well as a slice of paths. This commit also: - Makes PackageFile exposed externally so that it is a type that can be passed around. - Updates the error message when a package can't be parsed to make it more clear that it might not be a valid Python package. --- internal/distributions/metadata.go | 2 +- internal/packages/package.go | 118 ++--------------------------- parse.go | 49 ++++++------ types/types.go | 116 ++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 136 deletions(-) create mode 100644 types/types.go diff --git a/internal/distributions/metadata.go b/internal/distributions/metadata.go index 3066a15..8ffa5f4 100644 --- a/internal/distributions/metadata.go +++ b/internal/distributions/metadata.go @@ -63,7 +63,7 @@ func NewDistributionMetadata(filename string) (Distribution, string, string, err // "bdist_egg": BDist, } if err != nil { - return nil, "", "", fmt.Errorf("invalid distribution file: %s, err: %v", filepath.Base(filename), err) + return nil, "", "", fmt.Errorf("invalid distribution file: %s: %v", filepath.Base(filename), err) } // If this encounters a metadata version it doesn't support, it may give us diff --git a/internal/packages/package.go b/internal/packages/package.go index 5f0330f..25b7b73 100644 --- a/internal/packages/package.go +++ b/internal/packages/package.go @@ -1,37 +1,13 @@ package packages import ( - "errors" - "github.com/rstudio/python-distribution-parser/internal/distributions" - "github.com/samber/lo" - "io" - "log" - "os" "path/filepath" -) - -type PackageFile struct { - Filename string `json:"file_name"` - Comment string `json:"comment"` - BaseFilename string `json:"base_filename"` - Metadata *distributions.Distribution `json:"metadata"` - PythonVersion string `json:"pyversion"` - FileType string `json:"filetype"` - SafeName string `json:"safe_name"` - SignedFilename string `json:"signed_filename"` - SignedBaseFilename string `json:"signed_base_filename"` - GPGSignature *Signature `json:"gpg_signature"` - MD5Digest string `json:"md5_digest"` - SHA2Digest string `json:"sha256_digest"` - Blake2_256Digest string `json:"blake2_256_digest"` -} -type Signature struct { - Filename string `json:"signed_filename"` - Bytes []byte `json:"signed_bytes"` -} + "github.com/rstudio/python-distribution-parser/internal/distributions" + "github.com/rstudio/python-distribution-parser/types" +) -func NewPackageFile(filename string) (*PackageFile, error) { +func NewPackageFile(filename string) (*types.PackageFile, error) { metadata, pythonVersion, fileType, err := distributions.NewDistributionMetadata(filename) if err != nil { return nil, err @@ -53,7 +29,7 @@ func NewPackageFile(filename string) (*PackageFile, error) { } hexdigest := hashManager.HexDigest() - return &PackageFile{ + return &types.PackageFile{ Filename: filename, Comment: "", // Adding a comment isn't currently possible BaseFilename: baseFilename, @@ -68,87 +44,3 @@ func NewPackageFile(filename string) (*PackageFile, error) { Blake2_256Digest: hexdigest.blake2, }, nil } - -func (pf *PackageFile) MetadataMap() map[string][]string { - pkgMap := distributions.StructToMap(pf) - metadata := *pf.Metadata - metadataMap := metadata.MetadataMap() - result := make(map[string][]string, len(pkgMap)+len(metadataMap)) - for pk, pv := range pkgMap { - result[pk] = pv - } - for mk, mv := range metadataMap { - result[mk] = mv - } - - result["name"] = result["safe_name"] - // This makes the request look more like Twine - result["protocol_version"] = []string{"1"} - result[":action"] = []string{"file_upload"} - - ignoredKeys := []string{ - "base_filename", - "file_name", - "safe_name", - "signed_base_filename", - "signed_filename", - "metadata", - } - - for _, key := range ignoredKeys { - delete(result, key) - } - - allowedBlankValues := []string{ - "author", - "author_email", - "comment", - "download_url", - "home_page", - "keywords", - "license", - "maintainer", - "pyversion", - "description_content_type", - "maintainer_email", - "requires_python", - } - - // remove any keys that are an empty value, unless twine expects them - result = lo.OmitBy(result, func(key string, value []string) bool { - if lo.Contains(allowedBlankValues, key) { - return false - } - return value == nil || len(value) == 1 && (value[0] == "" || value[0] == "") - }) - - return result -} - -func (pf *PackageFile) AddGPGSignature(signatureFilepath string, signatureFilename string) error { - if pf.GPGSignature != nil { - return errors.New("GPG Signature can only be added once") - } - - gpg, err := os.Open(signatureFilepath) - if err != nil { - return err - } - defer func(gpg *os.File) { - err := gpg.Close() - if err != nil { - log.Printf("error closing file: %v", err) - } - }(gpg) - - bytes, err := io.ReadAll(gpg) - if err != nil { - return err - } - - pf.GPGSignature = &Signature{ - Filename: signatureFilename, - Bytes: bytes, - } - return nil -} diff --git a/parse.go b/parse.go index 961df84..32a47b7 100644 --- a/parse.go +++ b/parse.go @@ -3,11 +3,13 @@ package parse import ( "errors" "fmt" - "github.com/rstudio/python-distribution-parser/internal/packages" "os" "path/filepath" "sort" "strings" + + "github.com/rstudio/python-distribution-parser/internal/packages" + "github.com/rstudio/python-distribution-parser/types" ) func endsWith(str, suffix string) bool { @@ -57,10 +59,10 @@ func findDistributions(dists []string) ([]string, error) { return groupWheelFilesFirst(packages), nil } -func makePackage(filename string, signatures map[string]string) (*packages.PackageFile, error) { +func makePackage(filename string, signatures map[string]string) (*types.PackageFile, error) { packageFile, err := packages.NewPackageFile(filename) if err != nil { - return nil, fmt.Errorf("unable to create packageFile from %s: %v\n", filename, err) + return nil, fmt.Errorf("unable to parse %s, path may not point to a valid Python package: %v\n", filename, err) } signedName := packageFile.SignedBaseFilename @@ -78,7 +80,7 @@ func makePackage(filename string, signatures map[string]string) (*packages.Packa return packageFile, nil } -func parse(dists []string) ([]*packages.PackageFile, error) { +func parse(dists []string) ([]*types.PackageFile, error) { dists, err := findDistributions(dists) if err != nil { return nil, fmt.Errorf("error finding distributions: %v\n", err) @@ -98,7 +100,7 @@ func parse(dists []string) ([]*packages.PackageFile, error) { } } - var packages []*packages.PackageFile + var packages []*types.PackageFile for _, filename := range distributions { p, err := makePackage(filename, signatures) if err != nil { @@ -110,30 +112,33 @@ func parse(dists []string) ([]*packages.PackageFile, error) { return packages, nil } -func Parse(path string) ([]*packages.PackageFile, error) { +func Parse(paths ...string) ([]*types.PackageFile, error) { var files []string - info, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil, fmt.Errorf("%s does not exist: %v\n", path, err) - } - return nil, err - } - if info.IsDir() { - dirFiles, err := os.ReadDir(path) + for _, path := range paths { + info, err := os.Stat(path) if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("%s does not exist: %v\n", path, err) + } return nil, err } - for _, entry := range dirFiles { - // Don't recursively go in directories, only go one level deep. - if !entry.IsDir() { - fullPath := filepath.Join(path, entry.Name()) - files = append(files, fullPath) + + if info.IsDir() { + dirFiles, err := os.ReadDir(path) + if err != nil { + return nil, err } + for _, entry := range dirFiles { + // Don't recursively go in directories, only go one level deep. + if !entry.IsDir() { + fullPath := filepath.Join(path, entry.Name()) + files = append(files, fullPath) + } + } + } else { + files = append(files, path) } - } else { - files = append(files, path) } if len(files) == 0 { diff --git a/types/types.go b/types/types.go new file mode 100644 index 0000000..4954da9 --- /dev/null +++ b/types/types.go @@ -0,0 +1,116 @@ +package types + +import ( + "errors" + "io" + "log" + "os" + + "github.com/rstudio/python-distribution-parser/internal/distributions" + "github.com/samber/lo" +) + +type PackageFile struct { + Filename string `json:"file_name"` + Comment string `json:"comment"` + BaseFilename string `json:"base_filename"` + Metadata *distributions.Distribution `json:"metadata"` + PythonVersion string `json:"pyversion"` + FileType string `json:"filetype"` + SafeName string `json:"safe_name"` + SignedFilename string `json:"signed_filename"` + SignedBaseFilename string `json:"signed_base_filename"` + GPGSignature *Signature `json:"gpg_signature"` + MD5Digest string `json:"md5_digest"` + SHA2Digest string `json:"sha256_digest"` + Blake2_256Digest string `json:"blake2_256_digest"` +} + +type Signature struct { + Filename string `json:"signed_filename"` + Bytes []byte `json:"signed_bytes"` +} + +func (pf *PackageFile) MetadataMap() map[string][]string { + pkgMap := distributions.StructToMap(pf) + metadata := *pf.Metadata + metadataMap := metadata.MetadataMap() + result := make(map[string][]string, len(pkgMap)+len(metadataMap)) + for pk, pv := range pkgMap { + result[pk] = pv + } + for mk, mv := range metadataMap { + result[mk] = mv + } + + result["name"] = result["safe_name"] + // This makes the request look more like Twine + result["protocol_version"] = []string{"1"} + result[":action"] = []string{"file_upload"} + + ignoredKeys := []string{ + "base_filename", + "file_name", + "safe_name", + "signed_base_filename", + "signed_filename", + "metadata", + } + + for _, key := range ignoredKeys { + delete(result, key) + } + + allowedBlankValues := []string{ + "author", + "author_email", + "comment", + "download_url", + "home_page", + "keywords", + "license", + "maintainer", + "pyversion", + "description_content_type", + "maintainer_email", + "requires_python", + } + + // remove any keys that are an empty value, unless twine expects them + result = lo.OmitBy(result, func(key string, value []string) bool { + if lo.Contains(allowedBlankValues, key) { + return false + } + return value == nil || len(value) == 1 && (value[0] == "" || value[0] == "") + }) + + return result +} + +func (pf *PackageFile) AddGPGSignature(signatureFilepath string, signatureFilename string) error { + if pf.GPGSignature != nil { + return errors.New("GPG Signature can only be added once") + } + + gpg, err := os.Open(signatureFilepath) + if err != nil { + return err + } + defer func(gpg *os.File) { + err := gpg.Close() + if err != nil { + log.Printf("error closing file: %v", err) + } + }(gpg) + + bytes, err := io.ReadAll(gpg) + if err != nil { + return err + } + + pf.GPGSignature = &Signature{ + Filename: signatureFilename, + Bytes: bytes, + } + return nil +} From ed8e528534b8f7b9d2df51968192006ce68eaae4 Mon Sep 17 00:00:00 2001 From: Jacob Woliver Date: Fri, 27 Oct 2023 17:21:04 -0400 Subject: [PATCH 2/2] handle 'file already closed' error --- internal/archiver/archive_reader.go | 43 +++++++++++------------------ internal/distributions/sdist.go | 3 +- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/internal/archiver/archive_reader.go b/internal/archiver/archive_reader.go index aaa0003..f7c28e9 100644 --- a/internal/archiver/archive_reader.go +++ b/internal/archiver/archive_reader.go @@ -56,43 +56,32 @@ type tarReader struct { closer io.Closer } -func (t *tarReader) resetReader() error { - err := t.Close() - if err != nil { - return err - } - - // Reopen the file +func (t *tarReader) FileNames() ([]string, error) { f, err := os.Open(t.filename) if err != nil { - return err + return nil, err } - if strings.HasSuffix(t.filename, ".tar.gz") || strings.HasSuffix(t.filename, ".tgz") { - gzr, err := gzip.NewReader(f) + defer func(f *os.File) { + err := f.Close() if err != nil { - cerr := f.Close() - if cerr != nil { - return cerr - } - return err + log.Printf("error closing file: %v", err) } - t.Reader = tar.NewReader(gzr) // Reset the tar reader with new gzip reader - } + }(f) - return nil -} - -func (t *tarReader) FileNames() ([]string, error) { - defer func(t *tarReader) { - err := t.resetReader() - if err != nil { - log.Printf("error resetting reader: %v", err) + gzr, err := gzip.NewReader(f) + if err != nil { + cerr := f.Close() + if cerr != nil { + return nil, cerr } - }(t) + return nil, err + } + tarReader := tar.NewReader(gzr) + var names []string for { - hdr, err := t.Next() + hdr, err := tarReader.Next() if err == io.EOF { break } diff --git a/internal/distributions/sdist.go b/internal/distributions/sdist.go index 18e7807..9b0b344 100644 --- a/internal/distributions/sdist.go +++ b/internal/distributions/sdist.go @@ -3,11 +3,12 @@ package distributions import ( "bytes" "fmt" - "github.com/rstudio/python-distribution-parser/internal/archiver" "log" "path/filepath" "sort" "strings" + + "github.com/rstudio/python-distribution-parser/internal/archiver" ) type SDist struct {