Skip to content

Commit

Permalink
Merge branch 'master' into feat_presign
Browse files Browse the repository at this point in the history
  • Loading branch information
igungor committed Aug 11, 2023
2 parents c3c2f38 + ab52b9c commit a2e4bd1
Show file tree
Hide file tree
Showing 21 changed files with 962 additions and 95 deletions.
5 changes: 5 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@ updates:
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
# Create security fix PRs only
open-pull-requests-limit: 0


# Dependencies listed in .github/workflows/*.yml
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
# Create security fix PRs only
open-pull-requests-limit: 0
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
- Added `--show-fullpath` flag to `ls`. ([#596](https://github.com/peak/s5cmd/issues/596))
- Added `pipe` command. ([#182](https://github.com/peak/s5cmd/issues/182))
- Added `--show-progress` flag to `cp` to show a progress bar. ([#51](https://github.com/peak/s5cmd/issues/51))
- Added `--include` flag to `cp`, `rm` and `sync` commands. ([#516](https://github.com/peak/s5cmd/issues/516))

#### Improvements
- Implemented concurrent multipart download support for `cat`. ([#245](https://github.com/peak/s5cmd/issues/245))
- Upgraded minimum required Go version to 1.19. ([#583](https://github.com/peak/s5cmd/pull/583))

#### Bugfixes
- Fixed a bug that causes `sync` command with whitespaced flag value to fail. ([#541](https://github.com/peak/s5cmd/issues/541))
- Fixed a bug introduced with `external sort` support in `sync` command which prevents `sync` to an empty destination with `--delete` option. ([#576](https://github.com/peak/s5cmd/issues/576))
- Fixed a bug in `sync` command, which previously caused the command to continue running even if an error was received from the destination bucket. ([#564](https://github.com/peak/s5cmd/issues/564))
- Fixed a bug that causes local files to be lost if downloads fail. ([#479](https://github.com/peak/s5cmd/issues/479))

## v2.1.0 - 19 Jun 2023
Expand Down
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,29 @@ folder hierarchy.
⚠️ Copying objects (from S3 to S3) larger than 5GB is not supported yet. We have
an [open ticket](https://github.com/peak/s5cmd/issues/29) to track the issue.

#### Using Exclude and Include Filters
`s5cmd` supports the `--exclude` and `--include` flags, which can be used to specify patterns for objects to be excluded or included in commands.

- The `--exclude` flag specifies objects that should be excluded from the operation. Any object that matches the pattern will be skipped.
- The `--include` flag specifies objects that should be included in the operation. Only objects that match the pattern will be handled.
- If both flags are used, `--exclude` has precedence over `--include`. This means that if an object URL matches any of the `--exclude` patterns, the object will be skipped, even if it also matches one of the `--include` patterns.
- The order of the flags does not affect the results (unlike `aws-cli`).

The command below will delete only objects that end with `.log`.

s5cmd rm --include "*.log" 's3://bucket/logs/2020/*'

The command below will delete all objects except those that end with `.log` or `.txt`.

s5cmd rm --exclude "*.log" --exclude "*.txt" 's3://bucket/logs/2020/*'

If you wish, you can use multiple flags, like below. It will download objects that start with `request` and end with `.log`.

s5cmd cp --include "*.log" --include "request*" 's3://bucket/logs/2020/*' .

Using a combination of `--include` and `--exclude` also possible. The command below will only sync objects that end with `.log` and `.txt` but exclude those that start with `access_`. For example, `request.log`, and `license.txt` will be included, while `access_log.txt`, and `readme.md` are excluded.

s5cmd sync --include "*log" --exclude "access_*" --include "*txt" 's3://bucket/logs/*' .
#### Select JSON object content using SQL

`s5cmd` supports the `SelectObjectContent` S3 operation, and will run your
Expand Down
4 changes: 2 additions & 2 deletions command/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func generateCommand(c *cli.Context, cmd string, defaultFlags map[string]interfa

flags := []string{}
for flagname, flagvalue := range defaultFlags {
flags = append(flags, fmt.Sprintf("--%s=%v", flagname, flagvalue))
flags = append(flags, fmt.Sprintf("--%s='%v'", flagname, flagvalue))
}

isDefaultFlag := func(flagname string) bool {
Expand All @@ -88,7 +88,7 @@ func generateCommand(c *cli.Context, cmd string, defaultFlags map[string]interfa
}

for _, flagvalue := range contextValue(c, flagname) {
flags = append(flags, fmt.Sprintf("--%s=%s", flagname, flagvalue))
flags = append(flags, fmt.Sprintf("--%s='%s'", flagname, flagvalue))
}
}

Expand Down
37 changes: 28 additions & 9 deletions command/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package command

import (
"flag"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -46,7 +45,25 @@ func TestGenerateCommand(t *testing.T) {
mustNewURL(t, "s3://bucket/key1"),
mustNewURL(t, "s3://bucket/key2"),
},
expectedCommand: `cp --acl=public-read --raw=true "s3://bucket/key1" "s3://bucket/key2"`,
expectedCommand: `cp --acl='public-read' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`,
},
{
name: "cli-flag-with-whitespaced-flag-value",
cmd: "cp",
flags: []cli.Flag{
&cli.StringFlag{
Name: "cache-control",
Value: "public, max-age=31536000, immutable",
},
},
defaultFlags: map[string]interface{}{
"raw": true,
},
urls: []*url.URL{
mustNewURL(t, "s3://bucket/key1"),
mustNewURL(t, "s3://bucket/key2"),
},
expectedCommand: `cp --cache-control='public, max-age=31536000, immutable' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`,
},
{
name: "same-flag-should-be-ignored-if-given-from-both-default-and-cli-flags",
Expand All @@ -64,7 +81,7 @@ func TestGenerateCommand(t *testing.T) {
mustNewURL(t, "s3://bucket/key1"),
mustNewURL(t, "s3://bucket/key2"),
},
expectedCommand: `cp --raw=true "s3://bucket/key1" "s3://bucket/key2"`,
expectedCommand: `cp --raw='true' "s3://bucket/key1" "s3://bucket/key2"`,
},
{
name: "ignore-non-shared-flag",
Expand Down Expand Up @@ -101,7 +118,7 @@ func TestGenerateCommand(t *testing.T) {
mustNewURL(t, "s3://bucket/key1"),
mustNewURL(t, "s3://bucket/key2"),
},
expectedCommand: `cp --concurrency=6 --flatten=true --force-glacier-transfer=true --raw=true "s3://bucket/key1" "s3://bucket/key2"`,
expectedCommand: `cp --concurrency='6' --flatten='true' --force-glacier-transfer='true' --raw='true' "s3://bucket/key1" "s3://bucket/key2"`,
},
{
name: "string-slice-flag",
Expand All @@ -116,7 +133,7 @@ func TestGenerateCommand(t *testing.T) {
mustNewURL(t, "/source/dir"),
mustNewURL(t, "s3://bucket/prefix/"),
},
expectedCommand: `cp --exclude=*.log --exclude=*.txt "/source/dir" "s3://bucket/prefix/"`,
expectedCommand: `cp --exclude='*.log' --exclude='*.txt' "/source/dir" "s3://bucket/prefix/"`,
},
{
name: "command-with-multiple-args",
Expand Down Expand Up @@ -155,10 +172,12 @@ func TestGenerateCommand(t *testing.T) {
// and methods to update context are package-private, so write simple
// flag parser to update context value.
set.VisitAll(func(f *flag.Flag) {
value := strings.Trim(f.Value.String(), "[")
value = strings.Trim(value, "]")
for _, v := range strings.Fields(value) {
ctx.Set(f.Name, v)
if v, ok := f.Value.(*cli.StringSlice); ok {
for _, s := range v.Value() {
ctx.Set(f.Name, s)
}
} else {
ctx.Set(f.Name, f.Value.String())
}
})

Expand Down
43 changes: 37 additions & 6 deletions command/cp.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"sync"

Expand Down Expand Up @@ -97,13 +98,16 @@ Examples:
19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log
> s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket
20. Download an S3 object from a requester pays bucket
20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log
> s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket
21. Download an S3 object from a requester pays bucket
> s5cmd --request-payer=requester {{.HelpName}} s3://bucket/prefix/object.gz .
21. Upload a file to S3 with a content-type and content-encoding header
22. Upload a file to S3 with a content-type and content-encoding header
> s5cmd --content-type "text/css" --content-encoding "br" myfile.css.br s3://bucket/
22. Download the specific version of a remote object to working directory
23. Download the specific version of a remote object to working directory
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object .
`

Expand Down Expand Up @@ -169,6 +173,10 @@ func NewSharedFlags() []cli.Flag {
Name: "exclude",
Usage: "exclude objects with given pattern",
},
&cli.StringSliceFlag{
Name: "include",
Usage: "include objects with given pattern",
},
&cli.BoolFlag{
Name: "raw",
Usage: "disable the wildcard operations, useful with filenames that contains glob characters",
Expand Down Expand Up @@ -282,6 +290,7 @@ type Copy struct {
forceGlacierTransfer bool
ignoreGlacierWarnings bool
exclude []string
include []string
cacheControl string
expires string
contentType string
Expand All @@ -290,6 +299,10 @@ type Copy struct {
showProgress bool
progressbar progressbar.ProgressBar

// patterns
excludePatterns []*regexp.Regexp
includePatterns []*regexp.Regexp

// region settings
srcRegion string
dstRegion string
Expand Down Expand Up @@ -346,6 +359,7 @@ func NewCopy(c *cli.Context, deleteSource bool) (*Copy, error) {
forceGlacierTransfer: c.Bool("force-glacier-transfer"),
ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"),
exclude: c.StringSlice("exclude"),
include: c.StringSlice("include"),
cacheControl: c.String("cache-control"),
expires: c.String("expires"),
contentType: c.String("content-type"),
Expand Down Expand Up @@ -422,14 +436,27 @@ func (c Copy) Run(ctx context.Context) error {
isBatch = obj != nil && obj.Type.IsDir()
}

excludePatterns, err := createExcludesFromWildcard(c.exclude)
c.excludePatterns, err = createRegexFromWildcard(c.exclude)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}

c.includePatterns, err = createRegexFromWildcard(c.include)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}

for object := range objch {
if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) {
if errorpkg.IsCancelation(object.Err) || object.Type.IsDir() {
continue
}

if !object.Type.IsRegular() {
err := fmt.Errorf("object '%v' is not a regular file", object)
merrorObjects = multierror.Append(merrorObjects, err)
printError(c.fullCommand, c.op, err)
continue
}

Expand All @@ -448,7 +475,11 @@ func (c Copy) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, c.src.Prefix) {
isExcluded, err := isObjectExcluded(object, c.excludePatterns, c.includePatterns, c.src.Prefix)
if err != nil {
printError(c.fullCommand, c.op, err)
}
if isExcluded {
continue
}

Expand Down
4 changes: 2 additions & 2 deletions command/du.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (sz Size) Run(ctx context.Context) error {

var merror error

excludePatterns, err := createExcludesFromWildcard(sz.exclude)
excludePatterns, err := createRegexFromWildcard(sz.exclude)
if err != nil {
printError(sz.fullCommand, sz.op, err)
return err
Expand All @@ -161,7 +161,7 @@ func (sz Size) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, sz.src.Prefix) {
if isURLMatched(excludePatterns, object.URL.Path, sz.src.Prefix) {
continue
}

Expand Down
44 changes: 0 additions & 44 deletions command/exclude.go

This file was deleted.

8 changes: 4 additions & 4 deletions command/expand.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func expandSource(
followSymlinks bool,
srcurl *url.URL,
) (<-chan *storage.Object, error) {
var isDir bool
var objType storage.ObjectType
// if the source is local, we send a Stat call to know if we have
// directory or file to walk. For remote storage, we don't want to send
// Stat since it doesn't have any folder semantics.
Expand All @@ -28,17 +28,17 @@ func expandSource(
if err != nil {
return nil, err
}
isDir = obj.Type.IsDir()
objType = obj.Type
}

// call storage.List for only walking operations.
if srcurl.IsWildcard() || srcurl.AllVersions || isDir {
if srcurl.IsWildcard() || srcurl.AllVersions || objType.IsDir() {
return client.List(ctx, srcurl, followSymlinks), nil
}

ch := make(chan *storage.Object, 1)
if storage.ShouldProcessURL(srcurl, followSymlinks) {
ch <- &storage.Object{URL: srcurl}
ch <- &storage.Object{URL: srcurl, Type: objType}
}
close(ch)
return ch, nil
Expand Down
4 changes: 2 additions & 2 deletions command/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ func (l List) Run(ctx context.Context) error {

var merror error

excludePatterns, err := createExcludesFromWildcard(l.exclude)
excludePatterns, err := createRegexFromWildcard(l.exclude)
if err != nil {
printError(l.fullCommand, l.op, err)
return err
Expand All @@ -205,7 +205,7 @@ func (l List) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, l.src.Prefix) {
if isURLMatched(excludePatterns, object.URL.Path, l.src.Prefix) {
continue
}

Expand Down

0 comments on commit a2e4bd1

Please sign in to comment.