diff --git a/.github/dependabot.yml b/.github/dependabot.yml index d608a8244..6ec4829f2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,9 +5,14 @@ updates: directory: "/" # Location of package manifests schedule: interval: "weekly" + # Create security fix PRs only + open-pull-requests-limit: 0 + # Dependencies listed in .github/workflows/*.yml - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" + # Create security fix PRs only + open-pull-requests-limit: 0 diff --git a/CHANGELOG.md b/CHANGELOG.md index ad435b3cd..0716130f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,12 +9,14 @@ - Added `--show-fullpath` flag to `ls`. ([#596](https://github.com/peak/s5cmd/issues/596)) - Added `pipe` command. ([#182](https://github.com/peak/s5cmd/issues/182)) - Added `--show-progress` flag to `cp` to show a progress bar. ([#51](https://github.com/peak/s5cmd/issues/51)) +- Added `--include` flag to `cp`, `rm` and `sync` commands. ([#516](https://github.com/peak/s5cmd/issues/516)) #### Improvements - Implemented concurrent multipart download support for `cat`. ([#245](https://github.com/peak/s5cmd/issues/245)) - Upgraded minimum required Go version to 1.19. ([#583](https://github.com/peak/s5cmd/pull/583)) #### Bugfixes +- Fixed a bug that causes `sync` command with whitespaced flag value to fail. ([#541](https://github.com/peak/s5cmd/issues/541)) - Fixed a bug introduced with `external sort` support in `sync` command which prevents `sync` to an empty destination with `--delete` option. ([#576](https://github.com/peak/s5cmd/issues/576)) - Fixed a bug in `sync` command, which previously caused the command to continue running even if an error was received from the destination bucket. ([#564](https://github.com/peak/s5cmd/issues/564)) - Fixed a bug that causes local files to be lost if downloads fail. ([#479](https://github.com/peak/s5cmd/issues/479)) diff --git a/README.md b/README.md index a7734cba8..af9165b6f 100644 --- a/README.md +++ b/README.md @@ -301,6 +301,29 @@ folder hierarchy. ⚠️ Copying objects (from S3 to S3) larger than 5GB is not supported yet. We have an [open ticket](https://github.com/peak/s5cmd/issues/29) to track the issue. +#### Using Exclude and Include Filters +`s5cmd` supports the `--exclude` and `--include` flags, which can be used to specify patterns for objects to be excluded or included in commands. + +- The `--exclude` flag specifies objects that should be excluded from the operation. Any object that matches the pattern will be skipped. +- The `--include` flag specifies objects that should be included in the operation. Only objects that match the pattern will be handled. +- If both flags are used, `--exclude` has precedence over `--include`. This means that if an object URL matches any of the `--exclude` patterns, the object will be skipped, even if it also matches one of the `--include` patterns. +- The order of the flags does not affect the results (unlike `aws-cli`). + +The command below will delete only objects that end with `.log`. + + s5cmd rm --include "*.log" 's3://bucket/logs/2020/*' + +The command below will delete all objects except those that end with `.log` or `.txt`. + + s5cmd rm --exclude "*.log" --exclude "*.txt" 's3://bucket/logs/2020/*' + +If you wish, you can use multiple flags, like below. It will download objects that start with `request` and end with `.log`. + + s5cmd cp --include "*.log" --include "request*" 's3://bucket/logs/2020/*' . + +Using a combination of `--include` and `--exclude` also possible. The command below will only sync objects that end with `.log` and `.txt` but exclude those that start with `access_`. For example, `request.log`, and `license.txt` will be included, while `access_log.txt`, and `readme.md` are excluded. + + s5cmd sync --include "*log" --exclude "access_*" --include "*txt" 's3://bucket/logs/*' . #### Select JSON object content using SQL `s5cmd` supports the `SelectObjectContent` S3 operation, and will run your diff --git a/command/context.go b/command/context.go index 3e3f5d08d..1eb78fb3d 100644 --- a/command/context.go +++ b/command/context.go @@ -73,7 +73,7 @@ func generateCommand(c *cli.Context, cmd string, defaultFlags map[string]interfa flags := []string{} for flagname, flagvalue := range defaultFlags { - flags = append(flags, fmt.Sprintf("--%s=%v", flagname, flagvalue)) + flags = append(flags, fmt.Sprintf("--%s='%v'", flagname, flagvalue)) } isDefaultFlag := func(flagname string) bool { @@ -88,7 +88,7 @@ func generateCommand(c *cli.Context, cmd string, defaultFlags map[string]interfa } for _, flagvalue := range contextValue(c, flagname) { - flags = append(flags, fmt.Sprintf("--%s=%s", flagname, flagvalue)) + flags = append(flags, fmt.Sprintf("--%s='%s'", flagname, flagvalue)) } } diff --git a/command/context_test.go b/command/context_test.go index 3e7536319..e7783356f 100644 --- a/command/context_test.go +++ b/command/context_test.go @@ -2,7 +2,6 @@ package command import ( "flag" - "strings" "testing" "github.com/google/go-cmp/cmp" @@ -46,7 +45,25 @@ func TestGenerateCommand(t *testing.T) { mustNewURL(t, "s3://bucket/key1"), mustNewURL(t, "s3://bucket/key2"), }, - expectedCommand: `cp --acl=public-read --raw=true "s3://bucket/key1" "s3://bucket/key2"`, + expectedCommand: `cp --acl='public-read' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`, + }, + { + name: "cli-flag-with-whitespaced-flag-value", + cmd: "cp", + flags: []cli.Flag{ + &cli.StringFlag{ + Name: "cache-control", + Value: "public, max-age=31536000, immutable", + }, + }, + defaultFlags: map[string]interface{}{ + "raw": true, + }, + urls: []*url.URL{ + mustNewURL(t, "s3://bucket/key1"), + mustNewURL(t, "s3://bucket/key2"), + }, + expectedCommand: `cp --cache-control='public, max-age=31536000, immutable' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`, }, { name: "same-flag-should-be-ignored-if-given-from-both-default-and-cli-flags", @@ -64,7 +81,7 @@ func TestGenerateCommand(t *testing.T) { mustNewURL(t, "s3://bucket/key1"), mustNewURL(t, "s3://bucket/key2"), }, - expectedCommand: `cp --raw=true "s3://bucket/key1" "s3://bucket/key2"`, + expectedCommand: `cp --raw='true' "s3://bucket/key1" "s3://bucket/key2"`, }, { name: "ignore-non-shared-flag", @@ -101,7 +118,7 @@ func TestGenerateCommand(t *testing.T) { mustNewURL(t, "s3://bucket/key1"), mustNewURL(t, "s3://bucket/key2"), }, - expectedCommand: `cp --concurrency=6 --flatten=true --force-glacier-transfer=true --raw=true "s3://bucket/key1" "s3://bucket/key2"`, + expectedCommand: `cp --concurrency='6' --flatten='true' --force-glacier-transfer='true' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`, }, { name: "string-slice-flag", @@ -116,7 +133,7 @@ func TestGenerateCommand(t *testing.T) { mustNewURL(t, "/source/dir"), mustNewURL(t, "s3://bucket/prefix/"), }, - expectedCommand: `cp --exclude=*.log --exclude=*.txt "/source/dir" "s3://bucket/prefix/"`, + expectedCommand: `cp --exclude='*.log' --exclude='*.txt' "/source/dir" "s3://bucket/prefix/"`, }, { name: "command-with-multiple-args", @@ -155,10 +172,12 @@ func TestGenerateCommand(t *testing.T) { // and methods to update context are package-private, so write simple // flag parser to update context value. set.VisitAll(func(f *flag.Flag) { - value := strings.Trim(f.Value.String(), "[") - value = strings.Trim(value, "]") - for _, v := range strings.Fields(value) { - ctx.Set(f.Name, v) + if v, ok := f.Value.(*cli.StringSlice); ok { + for _, s := range v.Value() { + ctx.Set(f.Name, s) + } + } else { + ctx.Set(f.Name, f.Value.String()) } }) diff --git a/command/cp.go b/command/cp.go index 4a281fa50..36f71a3fe 100644 --- a/command/cp.go +++ b/command/cp.go @@ -9,6 +9,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strings" "sync" @@ -97,13 +98,16 @@ Examples: 19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log > s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket - 20. Download an S3 object from a requester pays bucket + 20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log + > s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket + + 21. Download an S3 object from a requester pays bucket > s5cmd --request-payer=requester {{.HelpName}} s3://bucket/prefix/object.gz . - 21. Upload a file to S3 with a content-type and content-encoding header + 22. Upload a file to S3 with a content-type and content-encoding header > s5cmd --content-type "text/css" --content-encoding "br" myfile.css.br s3://bucket/ - 22. Download the specific version of a remote object to working directory + 23. Download the specific version of a remote object to working directory > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object . ` @@ -169,6 +173,10 @@ func NewSharedFlags() []cli.Flag { Name: "exclude", Usage: "exclude objects with given pattern", }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, &cli.BoolFlag{ Name: "raw", Usage: "disable the wildcard operations, useful with filenames that contains glob characters", @@ -282,6 +290,7 @@ type Copy struct { forceGlacierTransfer bool ignoreGlacierWarnings bool exclude []string + include []string cacheControl string expires string contentType string @@ -290,6 +299,10 @@ type Copy struct { showProgress bool progressbar progressbar.ProgressBar + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp + // region settings srcRegion string dstRegion string @@ -346,6 +359,7 @@ func NewCopy(c *cli.Context, deleteSource bool) (*Copy, error) { forceGlacierTransfer: c.Bool("force-glacier-transfer"), ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"), exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), cacheControl: c.String("cache-control"), expires: c.String("expires"), contentType: c.String("content-type"), @@ -422,7 +436,13 @@ func (c Copy) Run(ctx context.Context) error { isBatch = obj != nil && obj.Type.IsDir() } - excludePatterns, err := createExcludesFromWildcard(c.exclude) + c.excludePatterns, err = createRegexFromWildcard(c.exclude) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + c.includePatterns, err = createRegexFromWildcard(c.include) if err != nil { printError(c.fullCommand, c.op, err) return err @@ -455,7 +475,11 @@ func (c Copy) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, c.src.Prefix) { + isExcluded, err := isObjectExcluded(object, c.excludePatterns, c.includePatterns, c.src.Prefix) + if err != nil { + printError(c.fullCommand, c.op, err) + } + if isExcluded { continue } diff --git a/command/du.go b/command/du.go index 3a46aab03..8812c9582 100644 --- a/command/du.go +++ b/command/du.go @@ -144,7 +144,7 @@ func (sz Size) Run(ctx context.Context) error { var merror error - excludePatterns, err := createExcludesFromWildcard(sz.exclude) + excludePatterns, err := createRegexFromWildcard(sz.exclude) if err != nil { printError(sz.fullCommand, sz.op, err) return err @@ -161,7 +161,7 @@ func (sz Size) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, sz.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, sz.src.Prefix) { continue } diff --git a/command/exclude.go b/command/exclude.go deleted file mode 100644 index 36d9a9aa9..000000000 --- a/command/exclude.go +++ /dev/null @@ -1,44 +0,0 @@ -package command - -import ( - "path/filepath" - "regexp" - "strings" - - "github.com/peak/s5cmd/v2/strutil" -) - -// createExcludesFromWildcard creates regex strings from wildcard. -func createExcludesFromWildcard(inputExcludes []string) ([]*regexp.Regexp, error) { - var result []*regexp.Regexp - for _, input := range inputExcludes { - if input != "" { - regex := strutil.WildCardToRegexp(input) - regex = strutil.MatchFromStartToEnd(regex) - regex = strutil.AddNewLineFlag(regex) - regexpCompiled, err := regexp.Compile(regex) - if err != nil { - return nil, err - } - result = append(result, regexpCompiled) - } - } - return result, nil -} - -// isURLExcluded checks whether given urlPath matches any of the exclude patterns. -func isURLExcluded(excludePatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { - if len(excludePatterns) == 0 { - return false - } - if !strings.HasSuffix(sourcePrefix, "/") { - sourcePrefix += "/" - } - sourcePrefix = filepath.ToSlash(sourcePrefix) - for _, excludePattern := range excludePatterns { - if excludePattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { - return true - } - } - return false -} diff --git a/command/ls.go b/command/ls.go index 2fe591d7a..87ff1e06b 100644 --- a/command/ls.go +++ b/command/ls.go @@ -188,7 +188,7 @@ func (l List) Run(ctx context.Context) error { var merror error - excludePatterns, err := createExcludesFromWildcard(l.exclude) + excludePatterns, err := createRegexFromWildcard(l.exclude) if err != nil { printError(l.fullCommand, l.op, err) return err @@ -205,7 +205,7 @@ func (l List) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, l.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, l.src.Prefix) { continue } diff --git a/command/rm.go b/command/rm.go index 94698abf0..3ba092b23 100644 --- a/command/rm.go +++ b/command/rm.go @@ -3,6 +3,7 @@ package command import ( "context" "fmt" + "regexp" "github.com/hashicorp/go-multierror" "github.com/urfave/cli/v2" @@ -38,17 +39,20 @@ Examples: 5. Delete all matching objects but exclude the ones with .txt extension or starts with "main" > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "main*" "s3://bucketname/prefix/*" + + 6. Delete all matching objects but only the ones with .txt extension or starts with "main" + > s5cmd {{.HelpName}} --include "*.txt" --include "main*" "s3://bucketname/prefix/*" - 6. Delete the specific version of a remote object's content to stdout + 7. Delete the specific version of a remote object's content to stdout > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object - 7. Delete all versions of an object in the bucket + 8. Delete all versions of an object in the bucket > s5cmd {{.HelpName}} --all-versions s3://bucket/object - 8. Delete all versions of all objects that starts with a prefix in the bucket + 9. Delete all versions of all objects that starts with a prefix in the bucket > s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*" - 9. Delete all versions of all objects in the bucket + 10. Delete all versions of all objects in the bucket > s5cmd {{.HelpName}} --all-versions "s3://bucket/*" ` @@ -66,6 +70,10 @@ func NewDeleteCommand() *cli.Command { Name: "exclude", Usage: "exclude objects with given pattern", }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, &cli.BoolFlag{ Name: "all-versions", Usage: "list all versions of object(s)", @@ -94,6 +102,18 @@ func NewDeleteCommand() *cli.Command { return err } + excludePatterns, err := createRegexFromWildcard(c.StringSlice("exclude")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + includePatterns, err := createRegexFromWildcard(c.StringSlice("include")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + return Delete{ src: srcUrls, op: c.Command.Name, @@ -101,6 +121,11 @@ func NewDeleteCommand() *cli.Command { // flags exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), + + // patterns + excludePatterns: excludePatterns, + includePatterns: includePatterns, storageOpts: NewStorageOpts(c), }.Run(c.Context) @@ -119,6 +144,11 @@ type Delete struct { // flag options exclude []string + include []string + + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp // storage options storageOpts storage.Options @@ -135,12 +165,6 @@ func (d Delete) Run(ctx context.Context) error { return err } - excludePatterns, err := createExcludesFromWildcard(d.exclude) - if err != nil { - printError(d.fullCommand, d.op, err) - return err - } - objch := expandSources(ctx, client, false, d.src...) var ( @@ -164,7 +188,11 @@ func (d Delete) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, srcurl.Prefix) { + isExcluded, err := isObjectExcluded(object, d.excludePatterns, d.includePatterns, srcurl.Prefix) + if err != nil { + printError(d.fullCommand, d.op, err) + } + if isExcluded { continue } diff --git a/command/select.go b/command/select.go index eb2c2eddf..cbd42990d 100644 --- a/command/select.go +++ b/command/select.go @@ -191,7 +191,7 @@ func (s Select) Run(ctx context.Context) error { } }() - excludePatterns, err := createExcludesFromWildcard(s.exclude) + excludePatterns, err := createRegexFromWildcard(s.exclude) if err != nil { printError(s.fullCommand, s.op, err) return err @@ -217,7 +217,7 @@ func (s Select) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, s.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, s.src.Prefix) { continue } diff --git a/command/sync.go b/command/sync.go index f6a65adf5..c848e7abf 100644 --- a/command/sync.go +++ b/command/sync.go @@ -66,6 +66,9 @@ Examples: 10. Sync all files to S3 bucket but exclude the ones with txt and gz extension > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 11. Sync all files to S3 bucket but include the only ones with txt and gz extension + > s5cmd {{.HelpName}} --include "*.txt" --include "*.gz" dir/ s3://bucket ` func NewSyncCommandFlags() []cli.Flag { diff --git a/command/wildcard.go b/command/wildcard.go new file mode 100644 index 000000000..73ac239e7 --- /dev/null +++ b/command/wildcard.go @@ -0,0 +1,57 @@ +package command + +import ( + "path/filepath" + "regexp" + "strings" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/strutil" +) + +// createRegexFromWildcard creates regex strings from wildcard. +func createRegexFromWildcard(wildcards []string) ([]*regexp.Regexp, error) { + var result []*regexp.Regexp + for _, input := range wildcards { + if input != "" { + regex := strutil.WildCardToRegexp(input) + regex = strutil.MatchFromStartToEnd(regex) + regex = strutil.AddNewLineFlag(regex) + regexpCompiled, err := regexp.Compile(regex) + if err != nil { + return nil, err + } + result = append(result, regexpCompiled) + } + } + return result, nil +} + +func isURLMatched(regexPatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { + if len(regexPatterns) == 0 { + return false + } + if !strings.HasSuffix(sourcePrefix, "/") { + sourcePrefix += "/" + } + sourcePrefix = filepath.ToSlash(sourcePrefix) + for _, regexPattern := range regexPatterns { + if regexPattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { + return true + } + } + return false +} + +func isObjectExcluded(object *storage.Object, excludePatterns []*regexp.Regexp, includePatterns []*regexp.Regexp, prefix string) (bool, error) { + if err := object.Err; err != nil { + return true, err + } + if len(excludePatterns) > 0 && isURLMatched(excludePatterns, object.URL.Path, prefix) { + return true, nil + } + if len(includePatterns) > 0 { + return !isURLMatched(includePatterns, object.URL.Path, prefix), nil + } + return false, nil +} diff --git a/command/wildcard_test.go b/command/wildcard_test.go new file mode 100644 index 000000000..4c3a8fae2 --- /dev/null +++ b/command/wildcard_test.go @@ -0,0 +1,86 @@ +package command + +import ( + "testing" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "gotest.tools/v3/assert" +) + +func TestIsObjectExcluded(t *testing.T) { + t.Parallel() + + testcases := []struct { + excludePatterns []string + includePatterns []string + objects []string + filteredObjects []string + }{ + { + excludePatterns: []string{"*.txt", "*.log"}, + includePatterns: []string{"file-*.doc"}, + objects: []string{"document.txt", "file-2.log", "file-1.doc", "image.png"}, + filteredObjects: []string{"file-1.doc"}, + }, + { + excludePatterns: []string{"secret-*"}, + includePatterns: []string{"*.txt", "*.log"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"file-1.txt", "file-2.txt"}, + }, + { + excludePatterns: []string{}, + includePatterns: []string{"*.png"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{}, + objects: []string{"readme.md", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"readme.md", "image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{"*txt"}, + objects: []string{"readme.txt", "file-1.txt", "file-2.txt", "license.txt"}, + filteredObjects: []string{"readme.txt", "license.txt"}, + }, + { + excludePatterns: []string{"*tmp", "*.txt"}, + includePatterns: []string{"*png", "*.doc*"}, + objects: []string{"readme.txt", "license.txt", "cache.tmp", "image.png", "eula.doc", "eula.docx", "personaldoc"}, + filteredObjects: []string{"image.png", "eula.doc", "eula.docx"}, + }, + } + + for _, tc := range testcases { + tc := tc + + excludeRegex, err := createRegexFromWildcard(tc.excludePatterns) + if err != nil { + t.Error(err) + } + + includeRegex, err := createRegexFromWildcard(tc.includePatterns) + if err != nil { + t.Error(err) + } + + var filteredObjects []string + + for _, object := range tc.objects { + skip, err := isObjectExcluded(&storage.Object{URL: &url.URL{Path: object}}, excludeRegex, includeRegex, "") + if err != nil { + t.Fatal(err) + } + if skip { + continue + } + filteredObjects = append(filteredObjects, object) + } + + assert.DeepEqual(t, tc.filteredObjects, filteredObjects) + } +} diff --git a/e2e/cp_test.go b/e2e/cp_test.go index 68cf9f671..92773ede3 100644 --- a/e2e/cp_test.go +++ b/e2e/cp_test.go @@ -4238,3 +4238,162 @@ func TestUploadingSocketFile(t *testing.T) { // assert exit code result.Assert(t, icmd.Expected{ExitCode: 1}) } + +// cp --include "*.py" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "file.txt", + "a.txt", + "src/file.txt", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--include", includePattern, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("cp %v/file1.py %s", srcpath, files[0]), + 1: equals("cp %v/file2.py %s", srcpath, files[1]), + }, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{ + fs.WithFile("file1.py", fileContent), + fs.WithFile("file2.py", fileContent), + } + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} + +// cp --include "file*" --exclude "*.py" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "file*" + excludePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--include", includePattern, "--exclude", excludePattern, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + assertLines(t, result.Stdout(), map[int]compareFunc{}, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{} + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} + +// cp --exclude "file*" --exclude "vendor/*" --include "*.py" --include "*.go" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + includePattern2 = "*.go" + excludePattern = "file*" + excludePattern2 = "vendor/*" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "file1.go", + "file2.go", + "test.py", + "app.py", + "app.go", + "vendor/package.go", + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--exclude", excludePattern, "--exclude", excludePattern2, "--include", includePattern, "--include", includePattern2, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("cp %v/app.go %s", srcpath, files[6]), + 1: equals("cp %v/app.py %s", srcpath, files[5]), + 2: equals("cp %v/test.py %s", srcpath, files[4]), + }, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{ + fs.WithFile("test.py", fileContent), + fs.WithFile("app.py", fileContent), + fs.WithFile("app.go", fileContent), + } + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} diff --git a/e2e/rm_test.go b/e2e/rm_test.go index 4a01b6a40..43c6cb628 100644 --- a/e2e/rm_test.go +++ b/e2e/rm_test.go @@ -1301,3 +1301,155 @@ func TestRemoveByVersionID(t *testing.T) { result = icmd.RunCmd(cmd) assert.Assert(t, result.Stdout() == "") } + +// rm --include "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "file.txt", + "data.txt", + "src/app.py", + } + filesKept := [...]string{ + "file.txt", + "data.txt", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--include", includePattern, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + fmt.Println(result.Stdout()) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[0]), + 1: equals("rm %v/%s", srcpath, files[1]), + 2: equals("rm %v/%s", srcpath, files[4]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} + +// rm --include "file*" --exclude "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.md" + excludePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/file.md", + } + filesKept := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--include", includePattern, "--exclude", excludePattern, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[4]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} + +// rm --exclude "docs*" --include "*.md" --include "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.md" + includePattern2 = "*.py" + excludePattern = "docs*" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + filesKept := [...]string{ + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--exclude", excludePattern, "--include", includePattern, "--include", includePattern2, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[3]), + 1: equals("rm %v/%s", srcpath, files[0]), + 2: equals("rm %v/%s", srcpath, files[1]), + 3: equals("rm %v/%s", srcpath, files[2]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} diff --git a/e2e/sync_test.go b/e2e/sync_test.go index 3dff3c42a..077034006 100644 --- a/e2e/sync_test.go +++ b/e2e/sync_test.go @@ -1963,3 +1963,88 @@ func TestSyncSocketDestinationEmpty(t *testing.T) { // assert exit code result.Assert(t, icmd.Expected{ExitCode: 1}) } + +// sync --include pattern s3://bucket/* s3://anotherbucket/prefix/ +func TestSyncS3ObjectsIntoAnotherBucketWithIncludeFilters(t *testing.T) { + t.Parallel() + + srcbucket := s3BucketFromTestNameWithPrefix(t, "src") + dstbucket := s3BucketFromTestNameWithPrefix(t, "dst") + + s3client, s5cmd := setup(t) + + createBucket(t, s3client, srcbucket) + createBucket(t, s3client, dstbucket) + + srcFiles := []string{ + "file_already_exists_in_destination.txt", + "file_not_exists_in_destination.txt", + "main.py", + "main.js", + "readme.md", + "main.pdf", + "main/file.txt", + } + + dstFiles := []string{ + "prefix/file_already_exists_in_destination.txt", + } + + excludedFiles := []string{ + "prefix/file_not_exists_in_destination.txt", + } + + includedFiles := []string{ + "main.js", + "main.pdf", + "main.py", + "main/file.txt", + "readme.md", + } + + const ( + content = "this is a file content" + includePattern1 = "main*" + includePattern2 = "*.md" + ) + + for _, filename := range srcFiles { + putFile(t, s3client, srcbucket, filename, content) + } + + for _, filename := range dstFiles { + putFile(t, s3client, dstbucket, filename, content) + } + + src := fmt.Sprintf("s3://%v/*", srcbucket) + dst := fmt.Sprintf("s3://%v/prefix/", dstbucket) + + cmd := s5cmd("sync", "--include", includePattern1, "--include", includePattern2, src, dst) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[0], dstbucket, includedFiles[0]), + 1: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[1], dstbucket, includedFiles[1]), + 2: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[2], dstbucket, includedFiles[2]), + 3: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[3], dstbucket, includedFiles[3]), + 4: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[4], dstbucket, includedFiles[4]), + }, sortInput(true)) + + // assert s3 source objects + for _, filename := range srcFiles { + assert.Assert(t, ensureS3Object(s3client, srcbucket, filename, content)) + } + + // assert s3 destination objects + for _, filename := range includedFiles { + assert.Assert(t, ensureS3Object(s3client, dstbucket, "prefix/"+filename, content)) + } + + // assert s3 destination objects which should not be in bucket. + for _, filename := range excludedFiles { + err := ensureS3Object(s3client, dstbucket, filename, content) + assertError(t, err, errS3NoSuchKey) + } +} diff --git a/progressbar/progressbar_test.go b/progressbar/progressbar_test.go index f9cf8cff8..8c8a12f9f 100644 --- a/progressbar/progressbar_test.go +++ b/progressbar/progressbar_test.go @@ -38,8 +38,7 @@ func TestCommandProgress_AddCompletedBytes(t *testing.T) { cp.Start() bytes := int64(101) cp.AddCompletedBytes(bytes) - assert.Equal(t, int64(101), cp.progressbar.Current()) - assert.Equal(t, int64(bytes), cp.progressbar.Current()) + assert.Equal(t, bytes, cp.progressbar.Current()) assert.Equal(t, true, strings.Contains(cp.progressbar.String(), "101 B")) } @@ -49,7 +48,6 @@ func TestCommandProgress_AddTotalBytes(t *testing.T) { cp.Start() bytes := int64(102) cp.AddTotalBytes(bytes) - assert.Equal(t, int64(102), cp.progressbar.Total()) assert.Equal(t, bytes, cp.progressbar.Total()) assert.Equal(t, true, strings.Contains(cp.progressbar.String(), "102 B")) }