diff --git a/cmd/data-usage.go b/cmd/data-usage.go index aaa0e22650f000..3ee1628dc1b9b0 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -21,7 +21,6 @@ import ( "context" "encoding/json" "errors" - "fmt" "os" "path" "path/filepath" @@ -258,14 +257,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo if err != nil { return nil, err } - fmt.Println("Adding", path.Clean(folder.name), thisHash, cache) + //fmt.Println("Adding", path.Clean(folder.name), thisHash, cache) f.newCache.addHashed(thisHash, folder.parent, cache) } return nextFolders, nil } func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) { - fmt.Println("deepScanFolder:", folder) + //fmt.Println("deepScanFolder:", folder) var cache dataUsageEntry delayMult := 10.0 if mult := os.Getenv(dataUsageTimeMultEnv); mult != "" { @@ -279,7 +278,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat var dirStack = []string{f.root, folder} addDir = func(entName string, typ os.FileMode) error { - fmt.Println("addDir:", entName) + //fmt.Println("addDir:", entName) select { case <-done: return ctx.Err() @@ -321,6 +320,8 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat return &cache, nil } +const updateDirCycles = 16 + func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) { if cache.Info.Name == "" { cache.Info.Name = "/" @@ -335,8 +336,6 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai existingFolders: nil, } - const updateCycles = 16 - done := ctx.Done() const flattenLevels = 2 // Add root @@ -353,10 +352,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai // No useful information... return cache, err } - fmt.Println("todo:", todo) + //fmt.Println("todo", todo) } - fmt.Println("new folders", s.newFolders) + //fmt.Println("new folders", s.newFolders) // Add add new folders first for _, folder := range s.newFolders { select { @@ -381,7 +380,7 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai } } - fmt.Println("existing folders", s.existingFolders) + //fmt.Println("existing folders", s.existingFolders) // Do selective scanning of existing folders. for _, folder := range s.existingFolders { @@ -391,10 +390,12 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai default: } h := stringHash(folder.name) - if !h.mod(s.oldCache.Info.NextCycle, updateCycles) { + if !h.mod(s.oldCache.Info.NextCycle, updateDirCycles) { s.newCache.addHashed(h, folder.parent, s.oldCache.Cache[h]) + //fmt.Println("skipping", folder.name) continue } + //fmt.Println("reindexing", folder.name) // Update on this cycle... du, err := s.deepScanFolder(ctx, folder.name) diff --git a/cmd/data-usage_test.go b/cmd/data-usage_test.go index 2a151d17537fe5..972b92267877c4 100644 --- a/cmd/data-usage_test.go +++ b/cmd/data-usage_test.go @@ -8,36 +8,30 @@ import ( "testing" ) +type usageTestFile struct { + name string + size int +} + func Test_updateUsage(t *testing.T) { - type testFile struct { - name string - size int - } base, err := ioutil.TempDir("", "Test_updateUsage") if err != nil { t.Skip(err) } defer os.RemoveAll(base) - var files = []testFile{ + var files = []usageTestFile{ {name: "rootfile", size: 10000}, - {name: "dir1/d1file", size: 20000}, - {name: "dir2/d2file", size: 30000}, + {name: "rootfile2", size: 10000}, + {name: "dir1/d1file", size: 2000}, + {name: "dir2/d2file", size: 300}, {name: "dir1/dira/dafile", size: 100000}, {name: "dir1/dira/dbfile", size: 200000}, {name: "dir1/dira/dirasub/dcfile", size: 1000000}, + {name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, } + createUsageTestFiles(t, base, files) - for _, f := range files { - err := os.MkdirAll(filepath.Dir(filepath.Join(base, f.name)), os.ModePerm) - if err != nil { - t.Fatal(err) - } - err = ioutil.WriteFile(filepath.Join(base, f.name), make([]byte, f.size), os.ModePerm) - if err != nil { - t.Fatal(err) - } - } - got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, func(item Item) (i int64, err error) { + getSize := func(item Item) (i int64, err error) { if item.Typ&os.ModeDir == 0 { s, err := os.Stat(item.Path) if err != nil { @@ -46,12 +40,264 @@ func Test_updateUsage(t *testing.T) { return s.Size(), nil } return 0, nil - }) + } + got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + // Test dirs + var want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 1322310, + objs: 8, + oSizes: sizeHistogram{0: 2, 1: 6}, + }, + { + path: "/", + size: 20000, + objs: 2, + oSizes: sizeHistogram{1: 2}, + }, + { + path: "/dir1", + size: 2000, + objs: 1, + oSizes: sizeHistogram{1: 1}, + }, + { + path: "/dir1/dira", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/dir1/dira/", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "newfolder/afile", + size: 4, + }, + { + name: "newfolder/anotherone", + size: 1, + }, + { + name: "newfolder/anemptyone", + size: 0, + }, + { + name: "dir1/fileindir1", + size: 20000, + }, + { + name: "dir1/dirc/fileindirc", + size: 20000, + }, + { + name: "rootfile3", + size: 1000, + }, + } + createUsageTestFiles(t, base, files) + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 1363315, + objs: 14, + oSizes: sizeHistogram{0: 6, 1: 8}, + }, + { + path: "/", + size: 21000, + objs: 3, + oSizes: sizeHistogram{0: 1, 1: 2}, + }, + { + path: "/newfolder", + size: 5, + objs: 3, + oSizes: sizeHistogram{0: 3}, + }, + { + path: "/dir1/dira", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "dir1/dira/dirasub/fileindira2", + size: 200, + }, + } + + createUsageTestFiles(t, base, files) + err = os.RemoveAll(filepath.Join(base, "dir1/dira/dirasub/dcfile")) if err != nil { t.Fatal(err) } + // Changed dir must be picked up in this many cycles. + for i := 0; i < updateDirCycles; i++ { + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + } + + want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 363515, + objs: 14, + oSizes: sizeHistogram{0: 7, 1: 7}, + }, + { + path: "/dir1/dira", + size: 300210, + objs: 4, + oSizes: sizeHistogram{0: 2, 1: 2}, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + t.Log(got.StringAll()) + t.Logf("Root, flat: %+v", got.flatten(*got.root())) t.Logf("Root: %+v", *got.root()) - t.Logf("dir1: %+v", *got.find("/dir1")) - t.Logf("dira: %+v", *got.find("/dir1/dira")) + t.Logf("/dir1/dira: %+v", *got.find("/dir1/dira")) + +} + +func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) { + for _, f := range files { + err := os.MkdirAll(filepath.Dir(filepath.Join(base, f.name)), os.ModePerm) + if err != nil { + t.Fatal(err) + } + err = ioutil.WriteFile(filepath.Join(base, f.name), make([]byte, f.size), os.ModePerm) + if err != nil { + t.Fatal(err) + } + } } diff --git a/pkg/s3select/select_test.go b/pkg/s3select/select_test.go index bbf81e17155059..12004322db8be7 100644 --- a/pkg/s3select/select_test.go +++ b/pkg/s3select/select_test.go @@ -260,6 +260,68 @@ func TestJSONQueries(t *testing.T) { `), wantResult: `"[""foo"",""bar"",""whatever""]"`, }, + { + name: "document", + query: "", + requestXML: []byte(` + + + select * from s3object s WHERE '__elem__merfu' IN s.elements[*].element_type + SQL + + NONE + + DOCUMENT + + + + + + + + FALSE + + + `), + withJSON: `{ + "name": "small_pdf1.pdf", + "lume_id": "9507193e-572d-4f95-bcf1-e9226d96be65", + "elements": [ + { + "element_type": "__elem__image", + "element_id": "859d09c4-7cf1-4a37-9674-3a7de8b56abc", + "attributes": { + "__attr__image_dpi": 300, + "__attr__image_size": [ + 2550, + 3299 + ], + "__attr__image_index": 1, + "__attr__image_format": "JPEG", + "__attr__file_extension": "jpg", + "__attr__data": null + } + }, + { + "element_type": "__elem__merfu", + "element_id": "d868aefe-ef9a-4be2-b9b2-c9fd89cc43eb", + "attributes": { + "__attr__image_dpi": 300, + "__attr__image_size": [ + 2550, + 3299 + ], + "__attr__image_index": 2, + "__attr__image_format": "JPEG", + "__attr__file_extension": "jpg", + "__attr__data": null + } + } + ], + "data": "asdascasdc1234e123erdasdas" +}`, + wantResult: `"[{""element_type"":""__elem__merfu"",""element_id"":""d868aefe-ef9a-4be2-b9b2-c9fd89cc43eb"",""attributes"":{""__attr__image_dpi"":300,""__attr__image_size"":[2550,3299],""__attr__image_index"":2,""__attr__image_format"":""JPEG"",""__attr__file_extension"":""jpg"",""__attr__data"":null}}]"`, + }, } defRequest := ` @@ -281,7 +343,7 @@ func TestJSONQueries(t *testing.T) { ` - for _, testCase := range testTable { + for _, testCase := range testTable[len(testTable)-1:] { t.Run(testCase.name, func(t *testing.T) { // Hack cpuid to the CPU doesn't appear to support AVX2. // Restore whatever happens.