From 9530a64fc735f243079d7259887843cd64ae5d73 Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Wed, 7 Jul 2021 18:34:16 +0300 Subject: [PATCH] cmd: add hashSUM file support (#5352) Currently rclone check supports matching two file trees by sizes and hashes. This change adds support for SUM files produced by GNU utilities like sha1sum. Fixes #1005 Note: checksum by default checks, hashsum by default prints sums. New flag is named "--checkfile" but carries hash name. Summary of introduced command forms: ``` rclone check sums.sha1 remote:path --checkfile sha1 rclone checksum sha1 sums.sha1 remote:path rclone hashsum sha1 remote:path --checkfile sums.sha1 rclone sha1sum remote:path --checkfile sums.sha1 rclone md5sum remote:path --checkfile sums.md5 ``` --- cmd/all/all.go | 1 + cmd/check/check.go | 48 ++++- cmd/checksum/checksum.go | 57 +++++ cmd/hashsum/hashsum.go | 24 +-- cmd/md5sum/md5sum.go | 4 + cmd/sha1sum/sha1sum.go | 4 + docs/content/commands/rclone_check.md | 4 + docs/content/commands/rclone_checksum.md | 68 ++++++ docs/content/commands/rclone_hashsum.md | 1 + docs/content/commands/rclone_md5sum.md | 1 + docs/content/commands/rclone_sha1sum.md | 1 + fs/filter/filter.go | 6 +- fs/hash/hash.go | 12 ++ fs/operations/check.go | 234 ++++++++++++++++++++- fs/operations/check_test.go | 253 +++++++++++++++++++++++ fs/operations/operations.go | 4 +- fs/operations/operations_test.go | 4 +- 17 files changed, 685 insertions(+), 41 deletions(-) create mode 100644 cmd/checksum/checksum.go create mode 100644 docs/content/commands/rclone_checksum.md diff --git a/cmd/all/all.go b/cmd/all/all.go index 9ce59e76b91c7..dbf2147ad48bf 100644 --- a/cmd/all/all.go +++ b/cmd/all/all.go @@ -10,6 +10,7 @@ import ( _ "github.com/rclone/rclone/cmd/cachestats" _ "github.com/rclone/rclone/cmd/cat" _ "github.com/rclone/rclone/cmd/check" + _ "github.com/rclone/rclone/cmd/checksum" _ "github.com/rclone/rclone/cmd/cleanup" _ "github.com/rclone/rclone/cmd/cmount" _ "github.com/rclone/rclone/cmd/config" diff --git a/cmd/check/check.go b/cmd/check/check.go index 11117b1470b3b..d9dc1b1adcc4d 100644 --- a/cmd/check/check.go +++ b/cmd/check/check.go @@ -2,6 +2,7 @@ package check import ( "context" + "fmt" "io" "os" "strings" @@ -17,20 +18,22 @@ import ( // Globals var ( - download = false - oneway = false - combined = "" - missingOnSrc = "" - missingOnDst = "" - match = "" - differ = "" - errFile = "" + download = false + oneway = false + combined = "" + missingOnSrc = "" + missingOnDst = "" + match = "" + differ = "" + errFile = "" + checkFileHashType = "" ) func init() { cmd.Root.AddCommand(commandDefinition) cmdFlags := commandDefinition.Flags() flags.BoolVarP(cmdFlags, &download, "download", "", download, "Check by downloading rather than with hash.") + flags.StringVarP(cmdFlags, &checkFileHashType, "checkfile", "C", checkFileHashType, "Treat source:path as a SUM file with hashes of given type") AddFlags(cmdFlags) } @@ -126,7 +129,6 @@ func GetCheckOpt(fsrc, fdst fs.Fs) (opt *operations.CheckOpt, close func(), err } return opt, close, nil - } var commandDefinition = &cobra.Command{ @@ -144,16 +146,39 @@ If you supply the |--download| flag, it will download the data from both remotes and check them against each other on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. + +If you supply the |--checkfile HASH| flag with a valid hash name, +the |source:path| must point to a text file in the SUM format. `, "|", "`") + FlagsHelp, - Run: func(command *cobra.Command, args []string) { + RunE: func(command *cobra.Command, args []string) error { cmd.CheckArgs(2, 2, command, args) - fsrc, fdst := cmd.NewFsSrcDst(args) + var ( + fsrc, fdst fs.Fs + hashType hash.Type + fsum fs.Fs + sumFile string + ) + if checkFileHashType != "" { + if err := hashType.Set(checkFileHashType); err != nil { + fmt.Println(hash.HelpString(0)) + return err + } + fsum, sumFile, fsrc = cmd.NewFsSrcFileDst(args) + } else { + fsrc, fdst = cmd.NewFsSrcDst(args) + } + cmd.Run(false, true, command, func() error { opt, close, err := GetCheckOpt(fsrc, fdst) if err != nil { return err } defer close() + + if checkFileHashType != "" { + return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hashType, opt, download) + } + if download { return operations.CheckDownload(context.Background(), opt) } @@ -165,5 +190,6 @@ to check all the data. } return operations.Check(context.Background(), opt) }) + return nil }, } diff --git a/cmd/checksum/checksum.go b/cmd/checksum/checksum.go new file mode 100644 index 0000000000000..2c74df1acf95f --- /dev/null +++ b/cmd/checksum/checksum.go @@ -0,0 +1,57 @@ +package checksum + +import ( + "context" + "fmt" + "strings" + + "github.com/rclone/rclone/cmd" + "github.com/rclone/rclone/cmd/check" // for common flags + "github.com/rclone/rclone/fs/config/flags" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/operations" + "github.com/spf13/cobra" +) + +var download = false + +func init() { + cmd.Root.AddCommand(commandDefinition) + cmdFlags := commandDefinition.Flags() + flags.BoolVarP(cmdFlags, &download, "download", "", download, "Check by hashing the contents.") + check.AddFlags(cmdFlags) +} + +var commandDefinition = &cobra.Command{ + Use: "checksum sumfile src:path", + Short: `Checks the files in the source against a SUM file.`, + Long: strings.ReplaceAll(` +Checks that hashsums of source files match the SUM file. +It compares hashes (MD5, SHA1, etc) and logs a report of files which +don't match. It doesn't alter the file system. + +If you supply the |--download| flag, it will download the data from remote +and calculate the contents hash on the fly. This can be useful for remotes +that don't support hashes or if you really want to check all the data. +`, "|", "`") + check.FlagsHelp, + RunE: func(command *cobra.Command, args []string) error { + cmd.CheckArgs(3, 3, command, args) + var hashType hash.Type + if err := hashType.Set(args[0]); err != nil { + fmt.Println(hash.HelpString(0)) + return err + } + fsum, sumFile, fsrc := cmd.NewFsSrcFileDst(args[1:]) + + cmd.Run(false, true, command, func() error { + opt, close, err := check.GetCheckOpt(nil, fsrc) + if err != nil { + return err + } + defer close() + + return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hashType, opt, download) + }) + return nil + }, +} diff --git a/cmd/hashsum/hashsum.go b/cmd/hashsum/hashsum.go index a69f8633bbb61..05da76273b4e9 100644 --- a/cmd/hashsum/hashsum.go +++ b/cmd/hashsum/hashsum.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "strings" "github.com/pkg/errors" "github.com/rclone/rclone/cmd" @@ -21,6 +20,7 @@ var ( OutputBase64 = false DownloadFlag = false HashsumOutfile = "" + ChecksumFile = "" ) func init() { @@ -33,6 +33,7 @@ func init() { func AddHashFlags(cmdFlags *pflag.FlagSet) { flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum") flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal") + flags.StringVarP(cmdFlags, &ChecksumFile, "checkfile", "C", ChecksumFile, "Validate hashes against a given SUM file instead of printing them") flags.BoolVarP(cmdFlags, &DownloadFlag, "download", "", DownloadFlag, "Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote") } @@ -70,7 +71,7 @@ hashed locally enabling any hash for any remote. Run without a hash to see the list of all supported hashes, e.g. $ rclone hashsum -` + hashListHelp(" ") + ` +` + hash.HelpString(4) + ` Then $ rclone hashsum MD5 remote:path @@ -80,7 +81,7 @@ Note that hash names are case insensitive. RunE: func(command *cobra.Command, args []string) error { cmd.CheckArgs(0, 2, command, args) if len(args) == 0 { - fmt.Print(hashListHelp("")) + fmt.Print(hash.HelpString(0)) return nil } else if len(args) == 1 { return errors.New("need hash type and remote") @@ -88,12 +89,16 @@ Note that hash names are case insensitive. var ht hash.Type err := ht.Set(args[0]) if err != nil { - fmt.Println(hashListHelp("")) + fmt.Println(hash.HelpString(0)) return err } fsrc := cmd.NewFsSrc(args[1:]) cmd.Run(false, false, command, func() error { + if ChecksumFile != "" { + fsum, sumFile := cmd.NewFsFile(ChecksumFile) + return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, ht, nil, DownloadFlag) + } if HashsumOutfile == "" { return operations.HashLister(context.Background(), ht, OutputBase64, DownloadFlag, fsrc, nil) } @@ -107,14 +112,3 @@ Note that hash names are case insensitive. return nil }, } - -func hashListHelp(indent string) string { - var help strings.Builder - help.WriteString(indent) - help.WriteString("Supported hashes are:\n") - for _, ht := range hash.Supported().Array() { - help.WriteString(indent) - fmt.Fprintf(&help, " * %v\n", ht.String()) - } - return help.String() -} diff --git a/cmd/md5sum/md5sum.go b/cmd/md5sum/md5sum.go index f7efd2320960d..51a8df58ea1e7 100644 --- a/cmd/md5sum/md5sum.go +++ b/cmd/md5sum/md5sum.go @@ -32,6 +32,10 @@ hashed locally enabling MD5 for any remote. cmd.CheckArgs(1, 1, command, args) fsrc := cmd.NewFsSrc(args) cmd.Run(false, false, command, func() error { + if hashsum.ChecksumFile != "" { + fsum, sumFile := cmd.NewFsFile(hashsum.ChecksumFile) + return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hash.MD5, nil, hashsum.DownloadFlag) + } if hashsum.HashsumOutfile == "" { return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil) } diff --git a/cmd/sha1sum/sha1sum.go b/cmd/sha1sum/sha1sum.go index ca1c15dd34466..dd7c5741f09b0 100644 --- a/cmd/sha1sum/sha1sum.go +++ b/cmd/sha1sum/sha1sum.go @@ -32,6 +32,10 @@ hashed locally enabling SHA-1 for any remote. cmd.CheckArgs(1, 1, command, args) fsrc := cmd.NewFsSrc(args) cmd.Run(false, false, command, func() error { + if hashsum.ChecksumFile != "" { + fsum, sumFile := cmd.NewFsFile(hashsum.ChecksumFile) + return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hash.SHA1, nil, hashsum.DownloadFlag) + } if hashsum.HashsumOutfile == "" { return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil) } diff --git a/docs/content/commands/rclone_check.md b/docs/content/commands/rclone_check.md index 8eb16c4b4f97a..21b7b01ea8432 100644 --- a/docs/content/commands/rclone_check.md +++ b/docs/content/commands/rclone_check.md @@ -24,6 +24,9 @@ both remotes and check them against each other on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. +If you supply the `--checkfile HASH` flag with a valid hash name, +the `source:path` must point to a text file in the SUM format. + If you supply the `--one-way` flag, it will only check that files in the source match the files in the destination, not the other way around. This means that extra files in the destination that are not in @@ -53,6 +56,7 @@ rclone check source:path dest:path [flags] ## Options ``` + -C, --checkfile string Treat source:path as a SUM file with hashes of given type --combined string Make a combined report of changes to this file --differ string Report all non-matching files to this file --download Check by downloading rather than with hash. diff --git a/docs/content/commands/rclone_checksum.md b/docs/content/commands/rclone_checksum.md new file mode 100644 index 0000000000000..6fd1c712e336f --- /dev/null +++ b/docs/content/commands/rclone_checksum.md @@ -0,0 +1,68 @@ +--- +title: "rclone checksum" +description: "Checks the files in the source against a SUM file." +slug: rclone_checksum +url: /commands/rclone_checksum/ +# autogenerated - DO NOT EDIT, instead edit the source code in cmd/checksum/ and as part of making a release run "make commanddocs" +--- +# rclone checksum + +Checks the files in the source against a SUM file. + +## Synopsis + + +Checks that hashsums of source files match the SUM file. +It compares hashes (MD5, SHA1, etc) and logs a report of files which +don't match. It doesn't alter the file system. + +If you supply the `--download` flag, it will download the data from remote +and calculate the contents hash on the fly. This can be useful for remotes +that don't support hashes or if you really want to check all the data. + +If you supply the `--one-way` flag, it will only check that files in +the source match the files in the destination, not the other way +around. This means that extra files in the destination that are not in +the source will not be detected. + +The `--differ`, `--missing-on-dst`, `--missing-on-src`, `--match` +and `--error` flags write paths, one per line, to the file name (or +stdout if it is `-`) supplied. What they write is described in the +help below. For example `--differ` will write all paths which are +present on both the source and destination but different. + +The `--combined` flag will write a file (or stdout) which contains all +file paths with a symbol and then a space and then the path to tell +you what happened to it. These are reminiscent of diff files. + +- `= path` means path was found in source and destination and was identical +- `- path` means path was missing on the source, so only in the destination +- `+ path` means path was missing on the destination, so only in the source +- `* path` means path was present in source and destination but different. +- `! path` means there was an error reading or hashing the source or dest. + + +``` +rclone checksum sumfile src:path [flags] +``` + +## Options + +``` + --combined string Make a combined report of changes to this file + --differ string Report all non-matching files to this file + --download Check by hashing the contents. + --error string Report all files with errors (hashing or reading) to this file + -h, --help help for checksum + --match string Report all matching files to this file + --missing-on-dst string Report all files missing from the destination to this file + --missing-on-src string Report all files missing from the source to this file + --one-way Check one way only, source files must exist on remote +``` + +See the [global flags page](/flags/) for global options not listed here. + +## SEE ALSO + +* [rclone](/commands/rclone/) - Show help for rclone commands, flags and backends. + diff --git a/docs/content/commands/rclone_hashsum.md b/docs/content/commands/rclone_hashsum.md index 45cddc2e4cd6e..fdd5c5bbcecf5 100644 --- a/docs/content/commands/rclone_hashsum.md +++ b/docs/content/commands/rclone_hashsum.md @@ -48,6 +48,7 @@ rclone hashsum remote:path [flags] ``` --base64 Output base64 encoded hashsum + -C, --checkfile string Validate hashes against a given SUM file instead of printing them --download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote -h, --help help for hashsum --output-file string Output hashsums to a file rather than the terminal diff --git a/docs/content/commands/rclone_md5sum.md b/docs/content/commands/rclone_md5sum.md index 76b7cc9f6a0a1..0a79610bab854 100644 --- a/docs/content/commands/rclone_md5sum.md +++ b/docs/content/commands/rclone_md5sum.md @@ -29,6 +29,7 @@ rclone md5sum remote:path [flags] ``` --base64 Output base64 encoded hashsum + -C, --checkfile string Validate hashes against a given SUM file instead of printing them --download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote -h, --help help for md5sum --output-file string Output hashsums to a file rather than the terminal diff --git a/docs/content/commands/rclone_sha1sum.md b/docs/content/commands/rclone_sha1sum.md index a8c13e2b75c29..4fd51c0fee997 100644 --- a/docs/content/commands/rclone_sha1sum.md +++ b/docs/content/commands/rclone_sha1sum.md @@ -29,6 +29,7 @@ rclone sha1sum remote:path [flags] ``` --base64 Output base64 encoded hashsum + -C, --checkfile string Validate hashes against a given SUM file instead of printing them --download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote -h, --help help for sha1sum --output-file string Output hashsums to a file rather than the terminal diff --git a/fs/filter/filter.go b/fs/filter/filter.go index 7e38aa8bc020c..5fa5d388c38f2 100644 --- a/fs/filter/filter.go +++ b/fs/filter/filter.go @@ -373,8 +373,8 @@ func (f *Filter) InActive() bool { len(f.Opt.ExcludeFile) == 0) } -// includeRemote returns whether this remote passes the filter rules. -func (f *Filter) includeRemote(remote string) bool { +// IncludeRemote returns whether this remote passes the filter rules. +func (f *Filter) IncludeRemote(remote string) bool { for _, rule := range f.fileRules.rules { if rule.Match(remote) { return rule.Include @@ -467,7 +467,7 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { return false } - return f.includeRemote(remote) + return f.IncludeRemote(remote) } // IncludeObject returns whether this object should be included into diff --git a/fs/hash/hash.go b/fs/hash/hash.go index e0d791d95088b..064a40da238be 100644 --- a/fs/hash/hash.go +++ b/fs/hash/hash.go @@ -341,3 +341,15 @@ func Equals(src, dst string) bool { } return src == dst } + +// HelpString returns help message with supported hashes +func HelpString(indent int) string { + padding := strings.Repeat(" ", indent) + var help strings.Builder + help.WriteString(padding) + help.WriteString("Supported hashes are:\n") + for _, h := range supported { + fmt.Fprintf(&help, "%s * %v\n", padding, h.String()) + } + return help.String() +} diff --git a/fs/operations/check.go b/fs/operations/check.go index 20017707fb8b4..62e5392f920c8 100644 --- a/fs/operations/check.go +++ b/fs/operations/check.go @@ -1,16 +1,19 @@ package operations import ( + "bufio" "bytes" "context" - "fmt" "io" + "os" + "regexp" "sync" "sync/atomic" "github.com/pkg/errors" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/fserrors" "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/march" @@ -56,15 +59,15 @@ type checkMarch struct { // report outputs the fileName to out if required and to the combined log func (c *checkMarch) report(o fs.DirEntry, out io.Writer, sigil rune) { + c.reportFilename(o.String(), out, sigil) +} + +func (c *checkMarch) reportFilename(filename string, out io.Writer, sigil rune) { if out != nil { - c.ioMu.Lock() - _, _ = fmt.Fprintf(out, "%v\n", o) - c.ioMu.Unlock() + syncFprintf(out, "%s\n", filename) } if c.opt.Combined != nil { - c.ioMu.Lock() - _, _ = fmt.Fprintf(c.opt.Combined, "%c %v\n", sigil, o) - c.ioMu.Unlock() + syncFprintf(c.opt.Combined, "%c %s\n", sigil, filename) } } @@ -224,11 +227,19 @@ func CheckFn(ctx context.Context, opt *CheckOpt) error { err := m.Run(ctx) c.wg.Wait() // wait for background go-routines + return c.reportResults(ctx, err) +} + +func (c *checkMarch) reportResults(ctx context.Context, err error) error { if c.dstFilesMissing > 0 { fs.Logf(c.opt.Fdst, "%d files missing", c.dstFilesMissing) } if c.srcFilesMissing > 0 { - fs.Logf(c.opt.Fsrc, "%d files missing", c.srcFilesMissing) + entity := "files" + if c.opt.Fsrc == nil { + entity = "hashes" + } + fs.Logf(c.opt.Fsrc, "%d %s missing", c.srcFilesMissing, entity) } fs.Logf(c.opt.Fdst, "%d differences found", accounting.Stats(ctx).GetErrors()) @@ -358,3 +369,210 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error { } return CheckFn(ctx, &optCopy) } + +// CheckSum checks filesystem hashes against a SUM file +func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType hash.Type, opt *CheckOpt, download bool) error { + var options CheckOpt + if opt != nil { + options = *opt + } else { + // default options for hashsum -c + options.Combined = os.Stdout + } + // CheckSum treats Fsrc and Fdst specially: + options.Fsrc = nil // no file system here, corresponds to the sum list + options.Fdst = fsrc // denotes the file system to check + opt = &options // override supplied argument + + if !download && (hashType == hash.None || !opt.Fdst.Hashes().Contains(hashType)) { + return errors.Errorf("%s: hash type is not supported by file system: %s", hashType, opt.Fdst) + } + + if sumFile == "" { + return errors.Errorf("not a sum file: %s", fsum) + } + sumObj, err := fsum.NewObject(ctx, sumFile) + if err != nil { + return errors.Wrap(err, "cannot open sum file") + } + hashes, err := ParseSumFile(ctx, sumObj) + if err != nil { + return errors.Wrap(err, "failed to parse sum file") + } + + ci := fs.GetConfig(ctx) + c := &checkMarch{ + tokens: make(chan struct{}, ci.Checkers), + opt: *opt, + } + lastErr := ListFn(ctx, opt.Fdst, func(obj fs.Object) { + c.checkSum(ctx, obj, download, hashes, hashType) + }) + c.wg.Wait() // wait for background go-routines + + // make census of unhandled sums + fi := filter.GetConfig(ctx) + for filename, hash := range hashes { + if hash == "" { // the sum has been successfully consumed + continue + } + if !fi.IncludeRemote(filename) { // the file was filtered out + continue + } + // filesystem missed the file, sum wasn't consumed + err := errors.Errorf("File not in %v", opt.Fdst) + fs.Errorf(filename, "%v", err) + _ = fs.CountError(err) + if lastErr == nil { + lastErr = err + } + atomic.AddInt32(&c.dstFilesMissing, 1) + c.reportFilename(filename, opt.MissingOnDst, '+') + } + + return c.reportResults(ctx, lastErr) +} + +// checkSum checks single object against golden hashes +func (c *checkMarch) checkSum(ctx context.Context, obj fs.Object, download bool, hashes HashSums, hashType hash.Type) { + remote := obj.Remote() + c.ioMu.Lock() + sumHash, sumFound := hashes[remote] + hashes[remote] = "" // mark sum as consumed + c.ioMu.Unlock() + + if !sumFound && c.opt.OneWay { + return + } + + var err error + tr := accounting.Stats(ctx).NewCheckingTransfer(obj) + defer tr.Done(ctx, err) + + if !sumFound { + err = errors.New("sum not found") + _ = fs.CountError(err) + fs.Errorf(obj, "%v", err) + atomic.AddInt32(&c.differences, 1) + atomic.AddInt32(&c.srcFilesMissing, 1) + c.report(obj, c.opt.MissingOnSrc, '-') + return + } + + if !download { + var objHash string + objHash, err = obj.Hash(ctx, hashType) + c.matchSum(ctx, sumHash, objHash, obj, err, hashType) + return + } + + c.wg.Add(1) + c.tokens <- struct{}{} // put a token to limit concurrency + go func() { + var ( + objHash string + err error + in io.ReadCloser + ) + defer func() { + c.matchSum(ctx, sumHash, objHash, obj, err, hashType) + <-c.tokens // get the token back to free up a slot + c.wg.Done() + }() + if in, err = obj.Open(ctx); err != nil { + return + } + tr := accounting.Stats(ctx).NewTransfer(obj) + in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer + defer func() { + tr.Done(ctx, nil) // will close the stream + }() + hashVals, err2 := hash.StreamTypes(in, hash.NewHashSet(hashType)) + if err2 != nil { + err = err2 // pass to matchSum + return + } + objHash = hashVals[hashType] + }() +} + +// matchSum sums up the results of hashsum matching for an object +func (c *checkMarch) matchSum(ctx context.Context, sumHash, objHash string, obj fs.Object, err error, hashType hash.Type) { + switch { + case err != nil: + _ = fs.CountError(err) + fs.Errorf(obj, "Failed to calculate hash: %v", err) + c.report(obj, c.opt.Error, '!') + case sumHash == "": + err = errors.New("duplicate file") + _ = fs.CountError(err) + fs.Errorf(obj, "%v", err) + c.report(obj, c.opt.Error, '!') + case objHash == "": + fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash) + fs.Debugf(obj, "%v - could not check hash (%v)", hashType, c.opt.Fdst) + atomic.AddInt32(&c.noHashes, 1) + atomic.AddInt32(&c.matches, 1) + c.report(obj, c.opt.Match, '=') + case objHash == sumHash: + fs.Debugf(obj, "%v = %s OK", hashType, sumHash) + atomic.AddInt32(&c.matches, 1) + c.report(obj, c.opt.Match, '=') + default: + err = errors.New("files differ") + _ = fs.CountError(err) + fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash) + fs.Debugf(obj, "%v = %s (%v)", hashType, objHash, c.opt.Fdst) + fs.Errorf(obj, "%v", err) + atomic.AddInt32(&c.differences, 1) + c.report(obj, c.opt.Differ, '*') + } +} + +// HashSums represents a parsed SUM file +type HashSums map[string]string + +// ParseSumFile parses a hash SUM file and returns hashes as a map +func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) { + rd, err := sumFile.Open(ctx) + if err != nil { + return nil, err + } + parser := bufio.NewReader(rd) + + const maxWarn = 4 + numWarn := 0 + + re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`) + hashes := HashSums{} + for lineNo := 0; true; lineNo++ { + lineBytes, _, err := parser.ReadLine() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + line := string(lineBytes) + if line == "" { + continue + } + + if fields := re.FindStringSubmatch(line); fields != nil { + hashes[fields[2]] = fields[1] + continue + } + + numWarn++ + if numWarn < maxWarn { + fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo) + } else if numWarn == maxWarn { + fs.Logf(sumFile, "more warnings suppressed...") + } + } + + if err = rd.Close(); err != nil { + return nil, err + } + return hashes, nil +} diff --git a/fs/operations/check_test.go b/fs/operations/check_test.go index df64212436a6c..a6c6436746fc6 100644 --- a/fs/operations/check_test.go +++ b/fs/operations/check_test.go @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fstest" "github.com/rclone/rclone/lib/readers" @@ -275,3 +276,255 @@ func TestCheckEqualReaders(t *testing.T) { assert.Equal(t, myErr, err) assert.Equal(t, differ, true) } + +func TestParseSumFile(t *testing.T) { + r := fstest.NewRun(t) + defer r.Finalise() + ctx := context.Background() + + const sumFile = "test.sum" + + samples := []struct { + hash, sep, name string + ok bool + }{ + {"1", " ", "file1", true}, + {"2", " *", "file2", true}, + {"3", " ", " file3 ", true}, + {"4", " ", "\tfile3\t", true}, + {"5", " ", "file5", false}, + {"6", "\t", "file6", false}, + {"7", " \t", " file7 ", false}, + {"", " ", "file8", false}, + {"", "", "file9", false}, + } + + for _, eol := range []string{"\n", "\r\n"} { + data := &bytes.Buffer{} + wantNum := 0 + for _, s := range samples { + _, _ = data.WriteString(s.hash + s.sep + s.name + eol) + if s.ok { + wantNum++ + } + } + + _ = r.WriteObject(ctx, sumFile, data.String(), t1) + file, err := r.Fremote.NewObject(ctx, sumFile) + assert.NoError(t, err) + sums, err := operations.ParseSumFile(ctx, file) + assert.NoError(t, err) + + assert.Equal(t, wantNum, len(sums)) + for _, s := range samples { + if s.ok { + assert.Equal(t, s.hash, sums[s.name]) + } + } + } +} + +func testCheckSum(t *testing.T, download bool) { + const dataDir = "data" + const sumFile = "test.sum" + + hashType := hash.MD5 + const ( + testString1 = "Hello, World!" + testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4" + testString2 = "I am the walrus" + testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3" + ) + + type wantType map[string]string + + ctx := context.Background() + r := fstest.NewRun(t) + defer r.Finalise() + + subRemote := r.FremoteName + if !strings.HasSuffix(subRemote, ":") { + subRemote += "/" + } + subRemote += dataDir + dataFs, err := fs.NewFs(ctx, subRemote) + require.NoError(t, err) + + if !download && !dataFs.Hashes().Contains(hashType) { + t.Skipf("%s lacks %s, skipping", dataFs, hashType) + } + + makeFile := func(name, content string) fstest.Item { + remote := dataDir + "/" + name + return r.WriteObject(ctx, remote, content, t1) + } + + makeSums := func(sums operations.HashSums) fstest.Item { + files := make([]string, 0, len(sums)) + for name := range sums { + files = append(files, name) + } + sort.Strings(files) + buf := &bytes.Buffer{} + for _, name := range files { + _, _ = fmt.Fprintf(buf, "%s %s\n", sums[name], name) + } + return r.WriteObject(ctx, sumFile, buf.String(), t1) + } + + sortLines := func(in string) []string { + if in == "" { + return []string{} + } + lines := strings.Split(in, "\n") + sort.Strings(lines) + return lines + } + + checkResult := func(runNo int, want wantType, name string, out io.Writer) { + expected := want[name] + buf, ok := out.(*bytes.Buffer) + require.True(t, ok) + assert.Equal(t, sortLines(expected), sortLines(buf.String()), "wrong %s result in run %d", name, runNo) + } + + checkRun := func(runNo, wantChecks, wantErrors int, want wantType) { + accounting.GlobalStats().ResetCounters() + buf := new(bytes.Buffer) + log.SetOutput(buf) + defer log.SetOutput(os.Stderr) + + opt := operations.CheckOpt{ + Combined: new(bytes.Buffer), + Match: new(bytes.Buffer), + Differ: new(bytes.Buffer), + Error: new(bytes.Buffer), + MissingOnSrc: new(bytes.Buffer), + MissingOnDst: new(bytes.Buffer), + } + err := operations.CheckSum(ctx, dataFs, r.Fremote, sumFile, hashType, &opt, download) + + gotErrors := int(accounting.GlobalStats().GetErrors()) + if wantErrors == 0 { + assert.NoError(t, err, "unexpected error in run %d", runNo) + } + if wantErrors > 0 { + assert.Error(t, err, "no expected error in run %d", runNo) + } + assert.Equal(t, wantErrors, gotErrors, "wrong error count in run %d", runNo) + + gotChecks := int(accounting.GlobalStats().GetChecks()) + if wantChecks > 0 || gotChecks > 0 { + assert.Contains(t, buf.String(), "matching files", "missing matching files in run %d", runNo) + } + assert.Equal(t, wantChecks, gotChecks, "wrong number of checks in run %d", runNo) + + checkResult(runNo, want, "combined", opt.Combined) + checkResult(runNo, want, "missingonsrc", opt.MissingOnSrc) + checkResult(runNo, want, "missingondst", opt.MissingOnDst) + checkResult(runNo, want, "match", opt.Match) + checkResult(runNo, want, "differ", opt.Differ) + checkResult(runNo, want, "error", opt.Error) + } + + check := func(runNo, wantChecks, wantErrors int, wantResults wantType) { + runName := fmt.Sprintf("move%d", runNo) + t.Run(runName, func(t *testing.T) { + checkRun(runNo, wantChecks, wantErrors, wantResults) + }) + } + + file1 := makeFile("banana", testString1) + fcsums := makeSums(operations.HashSums{ + "banana": testDigest1, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1) + check(1, 1, 0, wantType{ + "combined": "= banana\n", + "missingonsrc": "", + "missingondst": "", + "match": "banana\n", + "differ": "", + "error": "", + }) + + file2 := makeFile("potato", testString2) + fcsums = makeSums(operations.HashSums{ + "banana": testDigest1, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(2, 2, 1, wantType{ + "combined": "- potato\n= banana\n", + "missingonsrc": "potato\n", + "missingondst": "", + "match": "banana\n", + "differ": "", + "error": "", + }) + + fcsums = makeSums(operations.HashSums{ + "banana": testDigest1, + "potato": testDigest2, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(3, 2, 0, wantType{ + "combined": "= potato\n= banana\n", + "missingonsrc": "", + "missingondst": "", + "match": "banana\npotato\n", + "differ": "", + "error": "", + }) + + fcsums = makeSums(operations.HashSums{ + "banana": testDigest2, + "potato": testDigest2, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(4, 2, 1, wantType{ + "combined": "* banana\n= potato\n", + "missingonsrc": "", + "missingondst": "", + "match": "potato\n", + "differ": "banana\n", + "error": "", + }) + + fcsums = makeSums(operations.HashSums{ + "banana": testDigest1, + "potato": testDigest2, + "orange": testDigest2, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(5, 2, 1, wantType{ + "combined": "+ orange\n= potato\n= banana\n", + "missingonsrc": "", + "missingondst": "orange\n", + "match": "banana\npotato\n", + "differ": "", + "error": "", + }) + + fcsums = makeSums(operations.HashSums{ + "banana": testDigest1, + "potato": testDigest1, + "orange": testDigest2, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(6, 2, 2, wantType{ + "combined": "+ orange\n* potato\n= banana\n", + "missingonsrc": "", + "missingondst": "orange\n", + "match": "banana\n", + "differ": "potato\n", + "error": "", + }) +} + +func TestCheckSum(t *testing.T) { + testCheckSum(t, false) +} + +func TestCheckSumDownload(t *testing.T) { + testCheckSum(t, true) +} diff --git a/fs/operations/operations.go b/fs/operations/operations.go index c3c353aff0ea2..a8bb14eb3c1f3 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -853,7 +853,7 @@ var SyncPrintf = func(format string, a ...interface{}) { func syncFprintf(w io.Writer, format string, a ...interface{}) { outMutex.Lock() defer outMutex.Unlock() - if w == nil { + if w == nil || w == os.Stdout { SyncPrintf(format, a...) } else { _, _ = fmt.Fprintf(w, format, a...) @@ -1751,7 +1751,7 @@ func moveOrCopyFile(ctx context.Context, fdst fs.Fs, fsrc fs.Fs, dstFileName str // This will move the file to a temporary name then // move it back to the intended destination. This is required // to avoid issues with certain remotes and avoid file deletion. - if !cp && fdst.Name() == fsrc.Name() && fdst.Features().CaseInsensitive && dstFileName != srcFileName && strings.ToLower(dstFilePath) == strings.ToLower(srcFilePath) { + if !cp && fdst.Name() == fsrc.Name() && fdst.Features().CaseInsensitive && dstFileName != srcFileName && strings.EqualFold(dstFilePath, srcFilePath) { // Create random name to temporarily move file to tmpObjName := dstFileName + "-rclone-move-" + random.String(8) _, err := fdst.NewObject(ctx, tmpObjName) diff --git a/fs/operations/operations_test.go b/fs/operations/operations_test.go index f01b07a3ede44..3f39eb6fc8488 100644 --- a/fs/operations/operations_test.go +++ b/fs/operations/operations_test.go @@ -713,7 +713,7 @@ func TestCopyURL(t *testing.T) { fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported) // Check file clobbering - o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true) + _, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true) require.Error(t, err) // Check auto file naming @@ -725,7 +725,7 @@ func TestCopyURL(t *testing.T) { assert.Equal(t, urlFileName, o.Remote()) // Check auto file naming when url without file name - o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false) + _, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false) require.Error(t, err) // Check an error is returned for a 404