From 1d6be937035854dba5a8f58a320fb8b3826755a0 Mon Sep 17 00:00:00 2001 From: nielash Date: Tue, 6 Feb 2024 10:11:46 -0500 Subject: [PATCH] convmv command WIP --- cmd/all/all.go | 1 + cmd/convmv/convmv.go | 487 +++++++++++++++++++++++++ cmd/convmv/convmv_examples.go | 83 +++++ cmd/convmv/convmv_test.go | 202 ++++++++++ docs/content/commands/rclone_convmv.md | 299 +++++++++++++++ lib/encoder/encoder.go | 46 +-- 6 files changed, 1091 insertions(+), 27 deletions(-) create mode 100644 cmd/convmv/convmv.go create mode 100644 cmd/convmv/convmv_examples.go create mode 100644 cmd/convmv/convmv_test.go create mode 100644 docs/content/commands/rclone_convmv.md diff --git a/cmd/all/all.go b/cmd/all/all.go index 5fb87ed16317c..f2ec6b7818c29 100644 --- a/cmd/all/all.go +++ b/cmd/all/all.go @@ -15,6 +15,7 @@ import ( _ "github.com/rclone/rclone/cmd/cleanup" _ "github.com/rclone/rclone/cmd/cmount" _ "github.com/rclone/rclone/cmd/config" + _ "github.com/rclone/rclone/cmd/convmv" _ "github.com/rclone/rclone/cmd/copy" _ "github.com/rclone/rclone/cmd/copyto" _ "github.com/rclone/rclone/cmd/copyurl" diff --git a/cmd/convmv/convmv.go b/cmd/convmv/convmv.go new file mode 100644 index 0000000000000..a5bb73a46182d --- /dev/null +++ b/cmd/convmv/convmv.go @@ -0,0 +1,487 @@ +// Package convmv provides the convmv command. +package convmv + +import ( + "context" + "encoding/base64" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "unicode/utf8" + + "github.com/rclone/rclone/cmd" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config/flags" + "github.com/rclone/rclone/fs/list" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/lib/encoder" + "github.com/rclone/rclone/lib/random" + "github.com/spf13/cobra" + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/unicode/norm" +) + +// Globals +var ( + Opt ConvOpt + Cmaps = map[int]*charmap.Charmap{} +) + +// ConvOpt sets the conversion options +type ConvOpt struct { + ctx context.Context + f fs.Fs + ConvertAlgo Convert + FindReplace []string + Prefix string + Suffix string + Max int + Enc encoder.MultiEncoder + CmapFlag fs.Enum[cmapChoices] + Cmap *charmap.Charmap + List bool +} + +func init() { + cmd.Root.AddCommand(commandDefinition) + cmdFlags := commandDefinition.Flags() + flags.FVarP(cmdFlags, &Opt.ConvertAlgo, "conv", "t", "Conversion algorithm: "+Opt.ConvertAlgo.Help(), "") + flags.StringVarP(cmdFlags, &Opt.Prefix, "prefix", "", "", "In 'prefix' or 'trimprefix' mode, append or trim this prefix", "") + flags.StringVarP(cmdFlags, &Opt.Suffix, "suffix", "", "", "In 'suffix' or 'trimsuffix' mode, append or trim this suffix", "") + flags.IntVarP(cmdFlags, &Opt.Max, "max", "m", -1, "In 'truncate' mode, truncate all path segments longer than this many characters", "") + flags.StringArrayVarP(cmdFlags, &Opt.FindReplace, "replace", "r", nil, "In 'replace' mode, this is a pair of find,replace values (can repeat flag more than once)", "") + flags.FVarP(cmdFlags, &Opt.Enc, "encoding", "", "Custom backend encoding: (use --list to see full list)", "") + flags.FVarP(cmdFlags, &Opt.CmapFlag, "charmap", "", "Other character encoding (use --list to see full list) ", "") + flags.BoolVarP(cmdFlags, &Opt.List, "list", "", false, "Print full list of options", "") +} + +// Convert describes conversion setting +type Convert = fs.Enum[convertChoices] + +// Supported conversion options +const ( + ConvNone Convert = iota + ConvToNFC + ConvToNFD + ConvToNFKC + ConvToNFKD + ConvFindReplace + ConvPrefix + ConvSuffix + ConvTrimPrefix + ConvTrimSuffix + ConvIndex + ConvDate + ConvTruncate + ConvBase64Encode + ConvBase64Decode + ConvEncoder + ConvDecoder + ConvISO8859_1 + ConvWindows1252 + ConvMacintosh + ConvCharmap + ConvLowercase + ConvUppercase + ConvTitlecase + ConvASCII + ConvURL + ConvMapper +) + +type convertChoices struct{} + +func (convertChoices) Choices() []string { + return []string{ + ConvNone: "none", + ConvToNFC: "nfc", + ConvToNFD: "nfd", + ConvToNFKC: "nfkc", + ConvToNFKD: "nfkd", + ConvFindReplace: "replace", + ConvPrefix: "prefix", + ConvSuffix: "suffix", + ConvTrimPrefix: "trimprefix", + ConvTrimSuffix: "trimsuffix", + ConvIndex: "index", + ConvDate: "date", + ConvTruncate: "truncate", + ConvBase64Encode: "base64encode", + ConvBase64Decode: "base64decode", + ConvEncoder: "encoder", + ConvDecoder: "decoder", + ConvISO8859_1: "ISO-8859-1", + ConvWindows1252: "Windows-1252", + ConvMacintosh: "Macintosh", + ConvCharmap: "charmap", + ConvLowercase: "lowercase", + ConvUppercase: "uppercase", + ConvTitlecase: "titlecase", + ConvASCII: "ascii", + ConvURL: "url", + ConvMapper: "mapper", + } +} + +func (convertChoices) Type() string { + return "string" +} + +type cmapChoices struct{} + +func (cmapChoices) Choices() []string { + choices := make([]string, 1) + i := 0 + for _, enc := range charmap.All { + c, ok := enc.(*charmap.Charmap) + if !ok { + continue + } + name := strings.ReplaceAll(c.String(), " ", "-") + if name == "" { + name = fmt.Sprintf("unknown-%d", i) + } + Cmaps[i] = c + choices = append(choices, name) + i++ + } + return choices +} + +func (cmapChoices) Type() string { + return "string" +} + +func charmapByID(cm fs.Enum[cmapChoices]) *charmap.Charmap { + c, ok := Cmaps[int(cm)] + if ok { + return c + } + return nil +} + +var commandDefinition = &cobra.Command{ + Use: "convmv source:path", + Short: `Convert file and directory names`, + // Warning! "|" will be replaced by backticks below + Long: strings.ReplaceAll(` +Docs todo! + +`+sprintList()+` + +`, "|", "`"), + Annotations: map[string]string{ + "versionIntroduced": "v1.66", + "groups": "Filter,Listing,Important,Copy", + }, + Run: func(command *cobra.Command, args []string) { + cmd.CheckArgs(1, 1, command, args) + fsrc, srcFileName := cmd.NewFsFile(args[0]) + cmd.Run(false, true, command, func() error { // retries switched off to prevent double-encoding + return Convmv(context.Background(), fsrc, srcFileName) + }) + }, +} + +// Convmv converts and renames files and directories +// pass srcFileName == "" to convmv every object in fsrc instead of a single object +func Convmv(ctx context.Context, f fs.Fs, srcFileName string) error { + Opt.ctx = ctx + Opt.f = f + if Opt.List { + printList() + return nil + } + err := Opt.validate() + if err != nil { + return err + } + + if srcFileName == "" { + // it's a dir + return walkConv(ctx, f, "") + } + // it's a file + obj, err := f.NewObject(Opt.ctx, srcFileName) + if err != nil { + return err + } + oldName, newName, skip, err := parseEntry(obj) + if err != nil { + return err + } + if skip { + return nil + } + return operations.MoveFile(Opt.ctx, Opt.f, Opt.f, newName, oldName) +} + +func (opt *ConvOpt) validate() error { + switch opt.ConvertAlgo { + case ConvNone: + return errors.New("must choose a conversion mode with -t flag") + case ConvFindReplace: + if len(opt.FindReplace) == 0 { + return errors.New("must include --replace flag in replace mode") + } + for _, set := range opt.FindReplace { + split := strings.Split(set, ",") + if len(split) != 2 { + return errors.New("--replace must include exactly two comma-separated values") + } + if split[0] == "" { + return errors.New("'find' value cannot be blank ('replace' can be)") + } + } + case ConvPrefix, ConvTrimPrefix: + if opt.Prefix == "" { + return errors.New("must include a --prefix") + } + case ConvSuffix, ConvTrimSuffix: + if opt.Suffix == "" { + return errors.New("must include a --suffix") + } + case ConvTruncate: + if opt.Max < 1 { + return errors.New("--max cannot be less than 1 in 'truncate' mode") + } + case ConvCharmap: + if opt.CmapFlag == 0 { + return errors.New("must specify a charmap with --charmap flag") + } + c := charmapByID(opt.CmapFlag) + if c == nil { + return errors.New("unknown charmap") + } + opt.Cmap = c + } + + return nil +} + +// keeps track of which dirs we've already renamed +func walkConv(ctx context.Context, f fs.Fs, dir string) error { + entries, err := list.DirSorted(ctx, f, false, dir) + if err != nil { + return err + } + return walkFunc(dir, entries, nil) +} + +func walkFunc(path string, entries fs.DirEntries, err error) error { + fs.Debugf(path, "walking dir") + if err != nil { + return err + } + for _, entry := range entries { + switch x := entry.(type) { + case fs.Object: + oldName, newName, skip, err := parseEntry(x) + if err != nil { + return err + } + if skip { + continue + } + fs.Debugf(x, "%v %v %v %v %v", Opt.ctx, Opt.f, Opt.f, newName, oldName) + err = operations.MoveFile(Opt.ctx, Opt.f, Opt.f, newName, oldName) + if err != nil { + return err + } + case fs.Directory: + oldName, newName, skip, err := parseEntry(x) + if err != nil { + return err + } + if !skip { // still want to recurse during dry-runs to get accurate logs + err = DirMoveCaseInsensitive(Opt.ctx, Opt.f, oldName, newName) + if err != nil { + return err + } + } else { + newName = oldName // otherwise dry-runs won't be able to find it + } + // recurse, calling it by its new name + err = walkConv(Opt.ctx, Opt.f, newName) + if err != nil { + return err + } + } + } + return nil +} + +// ConvertPath converts a path string according to the chosen ConvertAlgo. +// Each path segment is converted separately, to preserve path separators. +// If baseOnly is true, only the base will be converted (useful for renaming while walking a dir tree recursively.) +// for example, "some/nested/path" -> "some/nested/CONVERTEDPATH" +// otherwise, the entire is path is converted. +func ConvertPath(s string, ConvertAlgo Convert, baseOnly bool) (string, error) { + if s == "" || s == "/" || s == "\\" || s == "." { + return "", nil + } + + if baseOnly { + convertedBase, err := ConvertPathSegment(filepath.Base(s), ConvertAlgo) + return filepath.Join(filepath.Dir(s), convertedBase), err + } + + segments := strings.Split(s, string(os.PathSeparator)) + convertedSegments := make([]string, len(segments)) + for _, seg := range segments { + convSeg, err := ConvertPathSegment(seg, ConvertAlgo) + if err != nil { + return "", err + } + convertedSegments = append(convertedSegments, convSeg) + } + return filepath.Join(convertedSegments...), nil +} + +// ConvertPathSegment converts one path segment (or really any string) according to the chosen ConvertAlgo. +// It assumes path separators have already been trimmed. +func ConvertPathSegment(s string, ConvertAlgo Convert) (string, error) { + fs.Debugf(s, "converting") + switch ConvertAlgo { + case ConvNone: + return s, nil + case ConvToNFC: + return norm.NFC.String(s), nil + case ConvToNFD: + return norm.NFD.String(s), nil + case ConvToNFKC: + return norm.NFKC.String(s), nil + case ConvToNFKD: + return norm.NFKD.String(s), nil + case ConvBase64Encode: + return base64.URLEncoding.EncodeToString([]byte(s)), nil // URLEncoding to avoid slashes + case ConvBase64Decode: + if s == ".DS_Store" { + return s, nil + } + b, err := base64.URLEncoding.DecodeString(s) + return string(b), err + case ConvFindReplace: + oldNews := []string{} + for _, pair := range Opt.FindReplace { + split := strings.Split(pair, ",") + oldNews = append(oldNews, split...) + } + replacer := strings.NewReplacer(oldNews...) + return replacer.Replace(s), nil + case ConvPrefix: + return Opt.Prefix + s, nil + case ConvSuffix: + return s + Opt.Suffix, nil + case ConvTrimPrefix: + return strings.TrimPrefix(s, Opt.Prefix), nil + case ConvTrimSuffix: + return strings.TrimSuffix(s, Opt.Suffix), nil + case ConvTruncate: + if Opt.Max <= 0 { + return s, nil + } + if utf8.RuneCountInString(s) <= Opt.Max { + return s, nil + } + runes := []rune(s) + return string(runes[:Opt.Max]), nil + case ConvEncoder: + return Opt.Enc.Encode(s), nil + case ConvDecoder: + return Opt.Enc.Decode(s), nil + case ConvISO8859_1: + return encodeWithReplacement(s, charmap.ISO8859_1), nil + case ConvWindows1252: + return encodeWithReplacement(s, charmap.Windows1252), nil + case ConvMacintosh: + return encodeWithReplacement(s, charmap.Macintosh), nil + case ConvCharmap: + return encodeWithReplacement(s, Opt.Cmap), nil + case ConvLowercase: + return strings.ToLower(s), nil + case ConvUppercase: + return strings.ToUpper(s), nil + case ConvTitlecase: + return strings.ToTitle(s), nil + case ConvASCII: + return toASCII(s), nil + default: + return "", errors.New("this option is not yet implemented") + } +} + +func parseEntry(e fs.DirEntry) (oldName, newName string, skip bool, err error) { + oldName = e.Remote() + newName, err = ConvertPath(oldName, Opt.ConvertAlgo, true) + if err != nil { + fs.Errorf(oldName, "error converting: %v", err) + return oldName, newName, true, err + } + if oldName == newName { + fs.Debugf(oldName, "name is already correct - skipping") + return oldName, newName, true, nil + } + skip = operations.SkipDestructive(Opt.ctx, oldName, "rename to "+newName) + return oldName, newName, skip, nil +} + +// DirMoveCaseInsensitive does DirMove in two steps (to temp name, then real name) +// which is necessary for some case-insensitive backends +func DirMoveCaseInsensitive(ctx context.Context, f fs.Fs, srcRemote, dstRemote string) (err error) { + tmpDstRemote := dstRemote + "-rclone-move-" + random.String(8) + err = operations.DirMove(ctx, f, srcRemote, tmpDstRemote) + if err != nil { + return err + } + return operations.DirMove(ctx, f, tmpDstRemote, dstRemote) +} + +func encodeWithReplacement(s string, cmap *charmap.Charmap) string { + return strings.Map(func(r rune) rune { + b, ok := cmap.EncodeRune(r) + if !ok { + return '_' + } + return cmap.DecodeByte(b) + }, s) +} + +func toASCII(s string) string { + return strings.Map(func(r rune) rune { + if r <= 127 { + return r + } + return -1 + }, s) +} + +func sprintList() string { + s := fmt.Sprintln("Conversion modes: ") + for _, v := range Opt.ConvertAlgo.Choices() { + s += fmt.Sprintln(v + " ") + } + s += fmt.Sprintln() + + s += fmt.Sprintln("Char maps: ") + for _, v := range Opt.CmapFlag.Choices() { + s += fmt.Sprintln(v + " ") + } + s += fmt.Sprintln() + + s += fmt.Sprintln("Encoding masks: ") + for _, v := range strings.Split(encoder.ValidStrings(), ",") { + s += fmt.Sprintln(v + " ") + } + s += fmt.Sprintln() + + s += sprintExamples() + + return s +} + +func printList() { + fmt.Println(sprintList()) +} diff --git a/cmd/convmv/convmv_examples.go b/cmd/convmv/convmv_examples.go new file mode 100644 index 0000000000000..1aa9ca5503fc9 --- /dev/null +++ b/cmd/convmv/convmv_examples.go @@ -0,0 +1,83 @@ +package convmv + +import ( + "fmt" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/lib/encoder" +) + +type example struct { + Opt ConvOpt + Path string +} + +var examples = []example{ + {Path: `stories/The Quick Brown Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvUppercase}}, + {Path: `stories/The Quick Brown Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvFindReplace, FindReplace: []string{"Fox,Turtle", "Quick,Slow"}}}, + {Path: `stories/The Quick Brown Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvBase64Encode}}, + {Path: `c3Rvcmllcw==/VGhlIFF1aWNrIEJyb3duIEZveCEudHh0`, Opt: ConvOpt{ConvertAlgo: ConvBase64Decode}}, + {Path: `stories/The Quick Brown 🦊 Fox Went to the Café!.txt`, Opt: ConvOpt{ConvertAlgo: ConvToNFC}}, + {Path: `stories/The Quick Brown 🦊 Fox Went to the Café!.txt`, Opt: ConvOpt{ConvertAlgo: ConvToNFD}}, + {Path: `stories/The Quick Brown 🦊 Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvASCII}}, + {Path: `stories/The Quick Brown Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvTrimSuffix, Suffix: ".txt"}}, + {Path: `stories/The Quick Brown Fox!.txt`, Opt: ConvOpt{ConvertAlgo: ConvPrefix, Prefix: "OLD_"}}, + {Path: `stories/The Quick Brown 🦊 Fox Went to the Café!.txt`, Opt: ConvOpt{ConvertAlgo: ConvCharmap, CmapFlag: 20}}, + {Path: `stories/The Quick Brown Fox: A Memoir [draft].txt`, Opt: ConvOpt{ConvertAlgo: ConvEncoder, Enc: encoder.EncodeColon | encoder.EncodeSquareBracket}}, + {Path: `stories/The Quick Brown 🦊 Fox Went to the Café!.txt`, Opt: ConvOpt{ConvertAlgo: ConvTruncate, Max: 21}}, +} + +func (e example) command() string { + s := fmt.Sprintf(`rclone convmv %q -t %s`, e.Path, e.Opt.ConvertAlgo) + switch e.Opt.ConvertAlgo { + case ConvFindReplace: + for _, r := range e.Opt.FindReplace { + s += fmt.Sprintf(` -r %q`, r) + } + case ConvTrimPrefix, ConvPrefix: + s += fmt.Sprintf(` --prefix %q`, e.Opt.Prefix) + case ConvTrimSuffix, ConvSuffix: + s += fmt.Sprintf(` --suffix %q`, e.Opt.Suffix) + case ConvCharmap: + s += fmt.Sprintf(` --charmap %q`, e.Opt.CmapFlag.String()) + case ConvEncoder: + s += fmt.Sprintf(` --encoding %q`, e.Opt.Enc.String()) + case ConvTruncate: + s += fmt.Sprintf(` --max %d`, e.Opt.Max) + } + return s +} + +func (e example) output() string { + _ = e.Opt.validate() + Opt = e.Opt + s, err := ConvertPath(e.Path, e.Opt.ConvertAlgo, false) + if err != nil { + fs.Errorf(s, "error: %v", err) + } + return s +} + +// go run ./ convmv --help +func sprintExamples() string { + s := "Examples: \n\n" + for _, e := range examples { + s += fmt.Sprintf("```\n%s\n", e.command()) + s += fmt.Sprintf("// Output: %s\n```\n\n", e.output()) + } + Opt = ConvOpt{} // reset + return s +} + +/* func sprintAllCharmapExamples() string { + s := "" + e := example{Path: `stories/The Quick Brown 🦊 Fox Went to the Café!.txt`, Opt: ConvOpt{ConvertAlgo: ConvCharmap, CmapFlag: 0}} + for i := range Cmaps { + e.Opt.CmapFlag++ + _ = e.Opt.validate() + Opt = e.Opt + s += fmt.Sprintf("%d Command: %s \n", i, e.command()) + s += fmt.Sprintf("Result: %s \n\n", e.output()) + } + return s +} */ diff --git a/cmd/convmv/convmv_test.go b/cmd/convmv/convmv_test.go new file mode 100644 index 0000000000000..105db3a38052d --- /dev/null +++ b/cmd/convmv/convmv_test.go @@ -0,0 +1,202 @@ +// Package convmv provides the convmv command. +package convmv + +import ( + "cmp" + "context" + "fmt" + "path/filepath" + "slices" + "strings" + "testing" + + _ "github.com/rclone/rclone/backend/all" // import all backends + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fs/walk" + "github.com/rclone/rclone/lib/encoder" + "golang.org/x/text/unicode/norm" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fstest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Some times used in the tests +var ( + t1 = fstest.Time("2001-02-03T04:05:06.499999999Z") + debug = `` +) + +// TestMain drives the tests +func TestMain(m *testing.M) { + fstest.TestMain(m) +} + +func TestConvmv(t *testing.T) { + type args struct { + ConvertAlgo fs.Enum[convertChoices] + ConvertBackAlgo fs.Enum[convertChoices] + Lossless bool // whether the ConvertBackAlgo is always losslessly invertible + ExtraOpt ConvOpt + } + tests := []struct { + name string + args args + }{ + {name: "NFC", args: args{ConvertAlgo: ConvToNFC, ConvertBackAlgo: ConvToNFD, Lossless: false}}, + {name: "NFD", args: args{ConvertAlgo: ConvToNFD, ConvertBackAlgo: ConvToNFC, Lossless: false}}, + {name: "NFKC", args: args{ConvertAlgo: ConvToNFKC, ConvertBackAlgo: ConvToNFKD, Lossless: false}}, + {name: "NFKD", args: args{ConvertAlgo: ConvToNFKD, ConvertBackAlgo: ConvToNFKC, Lossless: false}}, + {name: "base64", args: args{ConvertAlgo: ConvBase64Encode, ConvertBackAlgo: ConvBase64Decode, Lossless: true}}, + {name: "replace", args: args{ConvertAlgo: ConvFindReplace, ConvertBackAlgo: ConvFindReplace, Lossless: true, ExtraOpt: ConvOpt{FindReplace: []string{"bread,banana", "pie,apple", "apple,pie", "banana,bread"}}}}, + {name: "prefix", args: args{ConvertAlgo: ConvPrefix, ConvertBackAlgo: ConvTrimPrefix, Lossless: true, ExtraOpt: ConvOpt{Prefix: "PREFIX"}}}, + {name: "suffix", args: args{ConvertAlgo: ConvSuffix, ConvertBackAlgo: ConvTrimSuffix, Lossless: true, ExtraOpt: ConvOpt{Suffix: "SUFFIX"}}}, + {name: "truncate", args: args{ConvertAlgo: ConvTruncate, ConvertBackAlgo: ConvTruncate, Lossless: false, ExtraOpt: ConvOpt{Max: 10}}}, + {name: "encoder", args: args{ConvertAlgo: ConvEncoder, ConvertBackAlgo: ConvDecoder, Lossless: true, ExtraOpt: ConvOpt{Enc: encoder.OS}}}, + {name: "ISO-8859-1", args: args{ConvertAlgo: ConvISO8859_1, ConvertBackAlgo: ConvISO8859_1, Lossless: false}}, + {name: "charmap", args: args{ConvertAlgo: ConvCharmap, ConvertBackAlgo: ConvCharmap, Lossless: false, ExtraOpt: ConvOpt{CmapFlag: 3}}}, + {name: "lowercase", args: args{ConvertAlgo: ConvLowercase, ConvertBackAlgo: ConvUppercase, Lossless: false}}, + {name: "ascii", args: args{ConvertAlgo: ConvASCII, ConvertBackAlgo: ConvASCII, Lossless: false}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := fstest.NewRun(t) + defer r.Finalise() + + items := makeTestFiles(t, r, "dir1") + deleteDSStore(t, r) + r.CheckRemoteListing(t, items, nil) + + Opt = tt.args.ExtraOpt + Opt.ConvertAlgo = tt.args.ConvertAlgo + err := Convmv(context.Background(), r.Fremote, "") + assert.NoError(t, err) + compareNames(t, r, items) + + convertedItems := convertItems(t, items) + Opt.ConvertAlgo = tt.args.ConvertBackAlgo + err = Convmv(context.Background(), r.Fremote, "") + assert.NoError(t, err) + compareNames(t, r, convertedItems) + + if tt.args.Lossless { + deleteDSStore(t, r) + r.CheckRemoteItems(t, items...) + } + }) + } +} + +const alphabet = "ƀɀɠʀҠԀڀڠݠހ߀ကႠᄀᄠᅀᆀᇠሀሠበዠጠᎠᏀᐠᑀᑠᒀᒠᓀᓠᔀᔠᕀᕠᖀᖠᗀᗠᘀᘠᙀᚠᛀកᠠᡀᣀᦀ᧠ᨠᯀᰀᴀ⇠⋀⍀⍠⎀⎠⏀␀─┠╀╠▀■◀◠☀☠♀♠⚀⚠⛀⛠✀✠❀➀➠⠀⠠⡀⡠⢀⢠⣀⣠⤀⤠⥀⥠⦠⨠⩀⪀⪠⫠⬀⬠⭀ⰀⲀⲠⳀⴀⵀ⺠⻀㇀㐀㐠㑀㑠㒀㒠㓀㓠㔀㔠㕀㕠㖀㖠㗀㗠㘀㘠㙀㙠㚀㚠㛀㛠㜀㜠㝀㝠㞀㞠㟀㟠㠀㠠㡀㡠㢀㢠㣀㣠㤀㤠㥀㥠㦀㦠㧀㧠㨀㨠㩀㩠㪀㪠㫀㫠㬀㬠㭀㭠㮀㮠㯀㯠㰀㰠㱀㱠㲀㲠㳀㳠㴀㴠㵀㵠㶀㶠㷀㷠㸀㸠㹀㹠㺀㺠㻀㻠㼀㼠㽀㽠㾀㾠㿀㿠䀀䀠䁀䁠䂀䂠䃀䃠䄀䄠䅀䅠䆀䆠䇀䇠䈀䈠䉀䉠䊀䊠䋀䋠䌀䌠䍀䍠䎀䎠䏀䏠䐀䐠䑀䑠䒀䒠䓀䓠䔀䔠䕀䕠䖀䖠䗀䗠䘀䘠䙀䙠䚀䚠䛀䛠䜀䜠䝀䝠䞀䞠䟀䟠䠀䠠䡀䡠䢀䢠䣀䣠䤀䤠䥀䥠䦀䦠䧀䧠䨀䨠䩀䩠䪀䪠䫀䫠䬀䬠䭀䭠䮀䮠䯀䯠䰀䰠䱀䱠䲀䲠䳀䳠䴀䴠䵀䵠䶀䷀䷠一丠乀习亀亠什仠伀传佀你侀侠俀俠倀倠偀偠傀傠僀僠儀儠兀兠冀冠净几刀删剀剠劀加勀勠匀匠區占厀厠叀叠吀吠呀呠咀咠哀哠唀唠啀啠喀喠嗀嗠嘀嘠噀噠嚀嚠囀因圀圠址坠垀垠埀埠堀堠塀塠墀墠壀壠夀夠奀奠妀妠姀姠娀娠婀婠媀媠嫀嫠嬀嬠孀孠宀宠寀寠尀尠局屠岀岠峀峠崀崠嵀嵠嶀嶠巀巠帀帠幀幠庀庠廀廠开张彀彠往徠忀忠怀怠恀恠悀悠惀惠愀愠慀慠憀憠懀懠戀戠所扠技抠拀拠挀挠捀捠掀掠揀揠搀搠摀摠撀撠擀擠攀攠敀敠斀斠旀无昀映晀晠暀暠曀曠最朠杀杠枀枠柀柠栀栠桀桠梀梠检棠椀椠楀楠榀榠槀槠樀樠橀橠檀檠櫀櫠欀欠歀歠殀殠毀毠氀氠汀池沀沠泀泠洀洠浀浠涀涠淀淠渀渠湀湠満溠滀滠漀漠潀潠澀澠激濠瀀瀠灀灠炀炠烀烠焀焠煀煠熀熠燀燠爀爠牀牠犀犠狀狠猀猠獀獠玀玠珀珠琀琠瑀瑠璀璠瓀瓠甀甠畀畠疀疠痀痠瘀瘠癀癠皀皠盀盠眀眠着睠瞀瞠矀矠砀砠础硠碀碠磀磠礀礠祀祠禀禠秀秠稀稠穀穠窀窠竀章笀笠筀筠简箠節篠簀簠籀籠粀粠糀糠紀素絀絠綀綠緀締縀縠繀繠纀纠绀绠缀缠罀罠羀羠翀翠耀耠聀聠肀肠胀胠脀脠腀腠膀膠臀臠舀舠艀艠芀芠苀苠茀茠荀荠莀莠菀菠萀萠葀葠蒀蒠蓀蓠蔀蔠蕀蕠薀薠藀藠蘀蘠虀虠蚀蚠蛀蛠蜀蜠蝀蝠螀螠蟀蟠蠀蠠血衠袀袠裀裠褀褠襀襠覀覠觀觠言訠詀詠誀誠諀諠謀謠譀譠讀讠诀诠谀谠豀豠貀負賀賠贀贠赀赠趀趠跀跠踀踠蹀蹠躀躠軀軠輀輠轀轠辀辠迀迠退造遀遠邀邠郀郠鄀鄠酀酠醀醠釀釠鈀鈠鉀鉠銀銠鋀鋠錀錠鍀鍠鎀鎠鏀鏠鐀鐠鑀鑠钀钠铀铠销锠镀镠門閠闀闠阀阠陀陠隀隠雀雠需霠靀靠鞀鞠韀韠頀頠顀顠颀颠飀飠餀餠饀饠馀馠駀駠騀騠驀驠骀骠髀髠鬀鬠魀魠鮀鮠鯀鯠鰀鰠鱀鱠鲀鲠鳀鳠鴀鴠鵀鵠鶀鶠鷀鷠鸀鸠鹀鹠麀麠黀黠鼀鼠齀齠龀龠ꀀꀠꁀꁠꂀꂠꃀꃠꄀꄠꅀꅠꆀꆠꇀꇠꈀꈠꉀꉠꊀꊠꋀꋠꌀꌠꍀꍠꎀꎠꏀꏠꐀꐠꑀꑠ꒠ꔀꔠꕀꕠꖀꖠꗀꗠꙀꚠꛀ꜀꜠ꝀꞀꡀ測試_Русский___ě_áñ" + +var extras = []string{"apple", "banana", "appleappleapplebanana", "splitbananasplit"} + +func makeTestFiles(t *testing.T, r *fstest.Run, dir string) []fstest.Item { + t.Helper() + n := 0 + // Create test files + items := []fstest.Item{} + for _, c := range alphabet { + var out strings.Builder + for i := rune(0); i < 32; i++ { + out.WriteRune(c + i) + } + fileName := filepath.Join(dir, fmt.Sprintf("%04d-%s.txt", n, out.String())) + fileName = strings.ToValidUTF8(fileName, "") + + if debug != "" { + fileName = debug + } + + item := r.WriteObject(context.Background(), fileName, fileName, t1) + items = append(items, item) + n++ + + if debug != "" { + break + } + } + + for _, extra := range extras { + item := r.WriteObject(context.Background(), extra, extra, t1) + items = append(items, item) + } + + return items +} + +func deleteDSStore(t *testing.T, r *fstest.Run) { + ctxDSStore, fi := filter.AddConfig(context.Background()) + err := fi.AddRule(`+ *.DS_Store`) + assert.NoError(t, err) + err = fi.AddRule(`- **`) + assert.NoError(t, err) + err = operations.Delete(ctxDSStore, r.Fremote) + assert.NoError(t, err) +} + +func compareNames(t *testing.T, r *fstest.Run, items []fstest.Item) { + var entries fs.DirEntries + + deleteDSStore(t, r) + err := walk.ListR(context.Background(), r.Fremote, "", true, -1, walk.ListObjects, func(e fs.DirEntries) error { + entries = append(entries, e...) + return nil + }) + assert.NoError(t, err) + entries = slices.DeleteFunc(entries, func(E fs.DirEntry) bool { // remove those pesky .DS_Store files + if strings.Contains(E.Remote(), ".DS_Store") { + err := operations.DeleteFile(context.Background(), E.(fs.Object)) + assert.NoError(t, err) + return true + } + return false + }) + require.Equal(t, len(items), entries.Len()) + + // sort by CONVERTED name + slices.SortStableFunc(items, func(a, b fstest.Item) int { + aConv, err := ConvertPath(a.Path, Opt.ConvertAlgo, false) + require.NoError(t, err, a.Path) + bConv, err := ConvertPath(b.Path, Opt.ConvertAlgo, false) + require.NoError(t, err, b.Path) + return cmp.Compare(aConv, bConv) + }) + slices.SortStableFunc(entries, func(a, b fs.DirEntry) int { + return cmp.Compare(a.Remote(), b.Remote()) + }) + + for i, e := range entries { + expect, err := ConvertPath(items[i].Path, Opt.ConvertAlgo, false) + assert.NoError(t, err) + msg := fmt.Sprintf("expected %v, got %v", detectEncoding(expect), detectEncoding(e.Remote())) + assert.Equal(t, expect, e.Remote(), msg) + } +} + +func convertItems(t *testing.T, items []fstest.Item) []fstest.Item { + convertedItems := []fstest.Item{} + for _, item := range items { + newPath, err := ConvertPath(item.Path, Opt.ConvertAlgo, false) + assert.NoError(t, err) + newItem := item + newItem.Path = newPath + convertedItems = append(convertedItems, newItem) + } + return convertedItems +} + +func detectEncoding(s string) string { + if norm.NFC.IsNormalString(s) && norm.NFD.IsNormalString(s) { + return "BOTH" + } + if !norm.NFC.IsNormalString(s) && norm.NFD.IsNormalString(s) { + return "NFD" + } + if norm.NFC.IsNormalString(s) && !norm.NFD.IsNormalString(s) { + return "NFC" + } + return "OTHER" +} diff --git a/docs/content/commands/rclone_convmv.md b/docs/content/commands/rclone_convmv.md new file mode 100644 index 0000000000000..eefc19b67d836 --- /dev/null +++ b/docs/content/commands/rclone_convmv.md @@ -0,0 +1,299 @@ +--- +title: "rclone convmv" +description: "Convert file and directory names" +slug: rclone_convmv +url: /commands/rclone_convmv/ +groups: Filter,Listing,Important,Copy +versionIntroduced: v1.66 +# autogenerated - DO NOT EDIT, instead edit the source code in cmd/convmv/ and as part of making a release run "make commanddocs" +--- +# rclone convmv + +Convert file and directory names + +## Synopsis + + +Docs todo! + +Conversion modes: +none +nfc +nfd +nfkc +nfkd +replace +prefix +suffix +trimprefix +trimsuffix +index +date +truncate +base64encode +base64decode +encoder +decoder +ISO-8859-1 +Windows-1252 +Macintosh +charmap +lowercase +uppercase +titlecase +ascii +url +mapper + +Char maps: + +IBM-Code-Page-037 +IBM-Code-Page-437 +IBM-Code-Page-850 +IBM-Code-Page-852 +IBM-Code-Page-855 +Windows-Code-Page-858 +IBM-Code-Page-860 +IBM-Code-Page-862 +IBM-Code-Page-863 +IBM-Code-Page-865 +IBM-Code-Page-866 +IBM-Code-Page-1047 +IBM-Code-Page-1140 +ISO-8859-1 +ISO-8859-2 +ISO-8859-3 +ISO-8859-4 +ISO-8859-5 +ISO-8859-6 +ISO-8859-7 +ISO-8859-8 +ISO-8859-9 +ISO-8859-10 +ISO-8859-13 +ISO-8859-14 +ISO-8859-15 +ISO-8859-16 +KOI8-R +KOI8-U +Macintosh +Macintosh-Cyrillic +Windows-874 +Windows-1250 +Windows-1251 +Windows-1252 +Windows-1253 +Windows-1254 +Windows-1255 +Windows-1256 +Windows-1257 +Windows-1258 +X-User-Defined + +Encoding masks: +Asterisk + BackQuote + BackSlash + Colon + CrLf + Ctl + Del + Dollar + Dot + DoubleQuote + Hash + InvalidUtf8 + LeftCrLfHtVt + LeftPeriod + LeftSpace + LeftTilde + LtGt + None + Percent + Pipe + Question + RightCrLfHtVt + RightPeriod + RightSpace + Semicolon + SingleQuote + Slash + SquareBracket + +Examples: + +``` +rclone convmv "stories/The Quick Brown Fox!.txt" -t uppercase +// Output: STORIES/THE QUICK BROWN FOX!.TXT +``` + +``` +rclone convmv "stories/The Quick Brown Fox!.txt" -t replace -r "Fox,Turtle" -r "Quick,Slow" +// Output: stories/The Slow Brown Turtle!.txt +``` + +``` +rclone convmv "stories/The Quick Brown Fox!.txt" -t base64encode +// Output: c3Rvcmllcw==/VGhlIFF1aWNrIEJyb3duIEZveCEudHh0 +``` + +``` +rclone convmv "c3Rvcmllcw==/VGhlIFF1aWNrIEJyb3duIEZveCEudHh0" -t base64decode +// Output: stories/The Quick Brown Fox!.txt +``` + +``` +rclone convmv "stories/The Quick Brown 🦊 Fox Went to the Café!.txt" -t nfc +// Output: stories/The Quick Brown 🦊 Fox Went to the Café!.txt +``` + +``` +rclone convmv "stories/The Quick Brown 🦊 Fox Went to the Café!.txt" -t nfd +// Output: stories/The Quick Brown 🦊 Fox Went to the Café!.txt +``` + +``` +rclone convmv "stories/The Quick Brown 🦊 Fox!.txt" -t ascii +// Output: stories/The Quick Brown Fox!.txt +``` + +``` +rclone convmv "stories/The Quick Brown Fox!.txt" -t trimsuffix --suffix ".txt" +// Output: stories/The Quick Brown Fox! +``` + +``` +rclone convmv "stories/The Quick Brown Fox!.txt" -t prefix --prefix "OLD_" +// Output: OLD_stories/OLD_The Quick Brown Fox!.txt +``` + +``` +rclone convmv "stories/The Quick Brown 🦊 Fox Went to the Café!.txt" -t charmap --charmap "ISO-8859-7" +// Output: stories/The Quick Brown _ Fox Went to the Caf_!.txt +``` + +``` +rclone convmv "stories/The Quick Brown Fox: A Memoir [draft].txt" -t encoder --encoding "Colon,SquareBracket" +// Output: stories/The Quick Brown Fox: A Memoir [draft].txt +``` + +``` +rclone convmv "stories/The Quick Brown 🦊 Fox Went to the Café!.txt" -t truncate --max 21 +// Output: stories/The Quick Brown 🦊 Fox +``` + + + + + +``` +rclone convmv source:path [flags] +``` + +## Options + +``` + --charmap string Other character encoding (use --list to see full list) + -t, --conv string Conversion algorithm: none, nfc, nfd, nfkc, nfkd, replace, prefix, suffix, trimprefix, trimsuffix, index, date, truncate, base64encode, base64decode, encoder, decoder, ISO-8859-1, Windows-1252, Macintosh, charmap, lowercase, uppercase, titlecase, ascii, url, mapper (default "none") + --encoding Encoding Custom backend encoding: (use --list to see full list) (default None) + -h, --help help for convmv + --list Print full list of options + -m, --max int In 'truncate' mode, truncate all path segments longer than this many characters (default -1) + --prefix string In 'prefix' or 'trimprefix' mode, append or trim this prefix + -r, --replace stringArray In 'replace' mode, this is a pair of find,replace values (can repeat flag more than once) + --suffix string In 'suffix' or 'trimsuffix' mode, append or trim this suffix +``` + + +## Copy Options + +Flags for anything which can Copy a file. + +``` + --check-first Do all the checks before starting transfers + -c, --checksum Check for changes with size & checksum (if available, or fallback to size only). + --compare-dest stringArray Include additional comma separated server-side paths during comparison + --copy-dest stringArray Implies --compare-dest but also copies files from paths into destination + --cutoff-mode HARD|SOFT|CAUTIOUS Mode to stop transfers when reaching the max transfer limit HARD|SOFT|CAUTIOUS (default HARD) + --ignore-case-sync Ignore case when synchronizing + --ignore-checksum Skip post copy check of checksums + --ignore-existing Skip all files that exist on destination + --ignore-size Ignore size when skipping use modtime or checksum + -I, --ignore-times Don't skip files that match size and time - transfer all files + --immutable Do not modify files, fail if existing files have been modified + --inplace Download directly to destination file instead of atomic download to temp/rename + --max-backlog int Maximum number of objects in sync or check backlog (default 10000) + --max-duration Duration Maximum duration rclone will transfer data for (default 0s) + --max-transfer SizeSuffix Maximum size of data to transfer (default off) + -M, --metadata If set, preserve metadata when copying objects + --modify-window Duration Max time diff to be considered the same (default 1ns) + --multi-thread-chunk-size SizeSuffix Chunk size for multi-thread downloads / uploads, if not set by filesystem (default 64Mi) + --multi-thread-cutoff SizeSuffix Use multi-thread downloads for files above this size (default 256Mi) + --multi-thread-streams int Number of streams to use for multi-thread downloads (default 4) + --multi-thread-write-buffer-size SizeSuffix In memory buffer size for writing when in multi-thread mode (default 128Ki) + --no-check-dest Don't check the destination, copy regardless + --no-traverse Don't traverse destination file system on copy + --no-update-modtime Don't update destination modtime if files identical + --order-by string Instructions on how to order the transfers, e.g. 'size,descending' + --partial-suffix string Add partial-suffix to temporary file name when --inplace is not used (default ".partial") + --refresh-times Refresh the modtime of remote files + --server-side-across-configs Allow server-side operations (e.g. copy) to work across different configs + --size-only Skip based on size only, not modtime or checksum + --streaming-upload-cutoff SizeSuffix Cutoff for switching to chunked upload if file size is unknown, upload starts after reaching cutoff or when file ends (default 100Ki) + -u, --update Skip files that are newer on the destination +``` + +## Important Options + +Important flags useful for most commands. + +``` + -n, --dry-run Do a trial run with no permanent changes + -i, --interactive Enable interactive mode + -v, --verbose count Print lots more stuff (repeat for more) +``` + +## Filter Options + +Flags for filtering directory listings. + +``` + --delete-excluded Delete files on dest excluded from sync + --exclude stringArray Exclude files matching pattern + --exclude-from stringArray Read file exclude patterns from file (use - to read from stdin) + --exclude-if-present stringArray Exclude directories if filename is present + --files-from stringArray Read list of source-file names from file (use - to read from stdin) + --files-from-raw stringArray Read list of source-file names from file without any processing of lines (use - to read from stdin) + -f, --filter stringArray Add a file filtering rule + --filter-from stringArray Read file filtering patterns from a file (use - to read from stdin) + --ignore-case Ignore case in filters (case insensitive) + --include stringArray Include files matching pattern + --include-from stringArray Read file include patterns from file (use - to read from stdin) + --max-age Duration Only transfer files younger than this in s or suffix ms|s|m|h|d|w|M|y (default off) + --max-depth int If set limits the recursion depth to this (default -1) + --max-size SizeSuffix Only transfer files smaller than this in KiB or suffix B|K|M|G|T|P (default off) + --metadata-exclude stringArray Exclude metadatas matching pattern + --metadata-exclude-from stringArray Read metadata exclude patterns from file (use - to read from stdin) + --metadata-filter stringArray Add a metadata filtering rule + --metadata-filter-from stringArray Read metadata filtering patterns from a file (use - to read from stdin) + --metadata-include stringArray Include metadatas matching pattern + --metadata-include-from stringArray Read metadata include patterns from file (use - to read from stdin) + --min-age Duration Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y (default off) + --min-size SizeSuffix Only transfer files bigger than this in KiB or suffix B|K|M|G|T|P (default off) +``` + +## Listing Options + +Flags for listing directories. + +``` + --default-time Time Time to show if modtime is unknown for files and directories (default 2000-01-01T00:00:00Z) + --fast-list Use recursive list if available; uses more memory but fewer transactions +``` + +See the [global flags page](/flags/) for global options not listed here. + +# SEE ALSO + +* [rclone](/commands/rclone/) - Show help for rclone commands, flags and backends. + diff --git a/lib/encoder/encoder.go b/lib/encoder/encoder.go index a14d7c8485b42..746bfd3bd7813 100644 --- a/lib/encoder/encoder.go +++ b/lib/encoder/encoder.go @@ -147,8 +147,8 @@ func init() { alias("Dot", EncodeDot) } -// validStrings returns all the valid MultiEncoder strings -func validStrings() string { +// ValidStrings returns all the valid MultiEncoder strings +func ValidStrings() string { var out []string for k := range nameToEncoding { out = append(out, k) @@ -188,7 +188,7 @@ func (mask *MultiEncoder) Set(in string) error { } else { i, err := strconv.ParseInt(part, 0, 64) if err != nil { - return fmt.Errorf("bad encoding %q: possible values are: %s", part, validStrings()) + return fmt.Errorf("bad encoding %q: possible values are: %s", part, ValidStrings()) } out |= MultiEncoder(i) } @@ -305,8 +305,7 @@ func (mask MultiEncoder) Encode(in string) string { } if mask.Has(EncodeAsterisk) { // * switch r { - case '*', - '*': + case '*', '*': return true } } @@ -332,64 +331,55 @@ func (mask MultiEncoder) Encode(in string) string { } if mask.Has(EncodeQuestion) { // ? switch r { - case '?', - '?': + case '?', '?': return true } } if mask.Has(EncodeColon) { // : switch r { - case ':', - ':': + case ':', ':': return true } } if mask.Has(EncodePipe) { // | switch r { - case '|', - '|': + case '|', '|': return true } } if mask.Has(EncodeDoubleQuote) { // " switch r { - case '"', - '"': + case '"', '"': return true } } if mask.Has(EncodeSingleQuote) { // ' switch r { - case '\'', - ''': + case '\'', ''': return true } } if mask.Has(EncodeBackQuote) { // ` switch r { - case '`', - '`': + case '`', '`': return true } } if mask.Has(EncodeDollar) { // $ switch r { - case '$', - '$': + case '$', '$': return true } } if mask.Has(EncodeSlash) { // / switch r { - case '/', - '/': + case '/', '/': return true } } if mask.Has(EncodeBackSlash) { // \ switch r { - case '\\', - '\': + case '\\', '\': return true } } @@ -402,15 +392,13 @@ func (mask MultiEncoder) Encode(in string) string { } if mask.Has(EncodeHash) { // # switch r { - case '#', - '#': + case '#', '#': return true } } if mask.Has(EncodePercent) { // % switch r { - case '%', - '%': + case '%', '%': return true } } @@ -1137,6 +1125,7 @@ func appendQuotedBytes(w io.Writer, s string) { _, _ = fmt.Fprintf(w, string(QuoteRune)+"%02X", b) } } + func appendUnquotedByte(w io.Writer, s string) bool { if len(s) < 2 { return false @@ -1157,12 +1146,15 @@ func (identity) Decode(in string) string { return in } func (i identity) FromStandardPath(s string) string { return FromStandardPath(i, s) } + func (i identity) FromStandardName(s string) string { return FromStandardName(i, s) } + func (i identity) ToStandardPath(s string) string { return ToStandardPath(i, s) } + func (i identity) ToStandardName(s string) string { return ToStandardName(i, s) }