Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

import into: enable [] glob matching #51643

Merged
merged 4 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 4 additions & 3 deletions pkg/executor/importer/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
fileNameKey = strings.Trim(u.Path, "/")
}
// try to find pattern error in advance
_, err2 = filepath.Match(stringutil.EscapeGlobExceptAsterisk(fileNameKey), "")
_, err2 = filepath.Match(stringutil.EscapeGlobQuestionMark(fileNameKey), "")
if err2 != nil {
return exeerrors.ErrLoadDataInvalidURI.GenWithStackByArgs(plannercore.ImportIntoDataSource,
"Glob pattern error: "+err2.Error())
Expand All @@ -1063,7 +1063,8 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
s := e.dataStore
var totalSize int64
dataFiles := []*mydump.SourceFileMeta{}
idx := strings.IndexByte(fileNameKey, '*')
// check glob pattern is present in filename.
idx := strings.IndexAny(fileNameKey, "*[")
// simple path when the path represent one file
sourceType := e.getSourceType()
if idx == -1 {
Expand Down Expand Up @@ -1098,7 +1099,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error {
// when import from server disk, all entries in parent directory should have READ
// access, else walkDir will fail
// we only support '*', in order to reuse glob library manually escape the path
escapedPath := stringutil.EscapeGlobExceptAsterisk(fileNameKey)
escapedPath := stringutil.EscapeGlobQuestionMark(fileNameKey)
err := s.WalkDir(ctx, &storage.WalkOption{ObjPrefix: commonPrefix, SkipSubDir: true},
func(remotePath string, size int64) error {
// we have checked in LoadDataExec.Next
Expand Down
22 changes: 22 additions & 0 deletions pkg/executor/importer/import_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,28 @@ func TestSupportedSuffixForServerDisk(t *testing.T) {
require.NoError(t, os.Chmod(path.Join(tempDir, "no-perm"), 0o400))
c.Path = path.Join(tempDir, "server-*.csv")
require.NoError(t, c.InitDataFiles(ctx))
// test glob matching pattern [12]
err = os.WriteFile(path.Join(tempDir, "glob-1.csv"), []byte("1,1"), 0o644)
require.NoError(t, err)
err = os.WriteFile(path.Join(tempDir, "glob-2.csv"), []byte("2,2"), 0o644)
require.NoError(t, err)
err = os.WriteFile(path.Join(tempDir, "glob-3.csv"), []byte("3,3"), 0o644)
require.NoError(t, err)
c.Path = path.Join(tempDir, "glob-[12].csv")
require.NoError(t, c.InitDataFiles(ctx))
gotPath := make([]string, 0, len(c.dataFiles))
for _, f := range c.dataFiles {
gotPath = append(gotPath, f.Path)
}
require.ElementsMatch(t, []string{"glob-1.csv", "glob-2.csv"}, gotPath)
// test glob matching pattern [2-3]
c.Path = path.Join(tempDir, "glob-[2-3].csv")
require.NoError(t, c.InitDataFiles(ctx))
gotPath = make([]string, 0, len(c.dataFiles))
for _, f := range c.dataFiles {
gotPath = append(gotPath, f.Path)
}
require.ElementsMatch(t, []string{"glob-2.csv", "glob-3.csv"}, gotPath)
}

func TestGetDataSourceType(t *testing.T) {
Expand Down
14 changes: 3 additions & 11 deletions pkg/executor/test/loadremotetest/multi_file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,6 @@ func (s *mockGCSSuite) TestFilenameAsterisk() {
Content: []byte("5\ttest5\n" +
"6\ttest6"),
})
s.server.CreateObject(fakestorage.Object{
ObjectAttrs: fakestorage.ObjectAttrs{
BucketName: "test-multi-load",
Name: "not.me.[1-9].tsv",
},
Content: []byte("7\ttest7\n" +
"8\ttest8"),
})
s.server.CreateObject(fakestorage.Object{
ObjectAttrs: fakestorage.ObjectAttrs{
BucketName: "not-me",
Expand All @@ -88,14 +80,14 @@ func (s *mockGCSSuite) TestFilenameAsterisk() {
"2 test2", "4 test4", "6 test6",
))

// only '*' is supported in pattern matching
// only `*` and `[]` is supported in pattern matching
s.tk.MustExec("TRUNCATE TABLE multi_load.t;")
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-multi-load/not.me.[1-9].tsv?endpoint=%s'
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-multi-load/db.tbl.00[13].tsv?endpoint=%s'
INTO TABLE multi_load.t with thread=1;`, gcsEndpoint)
s.tk.MustExec(sql)
s.Equal(uint64(0), s.tk.Session().GetSessionVars().StmtCtx.LastInsertID)
s.tk.MustQuery("SELECT * FROM multi_load.t;").Check(testkit.Rows(
"7 test7", "8 test8",
"1 test1", "2 test2", "5 test5", "6 test6",
))
}

Expand Down
7 changes: 3 additions & 4 deletions pkg/util/stringutil/string_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,13 +546,12 @@ func LowerOneStringExcludeEscapeChar(str []byte, escapeChar byte) byte {
return actualEscapeChar
}

// EscapeGlobExceptAsterisk escapes '?', '[', ']' for a glob path pattern.
func EscapeGlobExceptAsterisk(s string) string {
// EscapeGlobQuestionMark escapes '?' for a glob path pattern.
func EscapeGlobQuestionMark(s string) string {
var buf strings.Builder
buf.Grow(len(s))
for _, c := range s {
switch c {
case '?', '[', ']':
if c == '?' {
buf.WriteByte('\\')
}
buf.WriteRune(c)
Expand Down
6 changes: 3 additions & 3 deletions pkg/util/stringutil/string_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,15 +194,15 @@ func TestBuildStringFromLabels(t *testing.T) {
}
}

func TestEscapeGlobExceptAsterisk(t *testing.T) {
func TestEscapeGlobQuestionMark(t *testing.T) {
cases := [][2]string{
{"123", "123"},
{"12*3", "12*3"},
{"12?", `12\?`},
{`[1-2]`, `\[1-2\]`},
{`[1-2]`, `[1-2]`},
}
for _, pair := range cases {
require.Equal(t, pair[1], EscapeGlobExceptAsterisk(pair[0]))
require.Equal(t, pair[1], EscapeGlobQuestionMark(pair[0]))
}
}

Expand Down