Skip to content

Commit

Permalink
lightning: return 0 early on empty parquet files (#52519) (#52521)
Browse files Browse the repository at this point in the history
close #52518
  • Loading branch information
ti-chi-bot committed Apr 12, 2024
1 parent 29491d4 commit 4ba4bcd
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pkg/lightning/mydump/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store
// SampleParquetDataSize samples the data size of the parquet file.
func SampleParquetDataSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (int64, error) {
totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta)
if err != nil {
if totalRowCount == 0 || err != nil {
return 0, err
}

Expand Down
9 changes: 7 additions & 2 deletions pkg/lightning/mydump/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ func TestSampleFileCompressRatio(t *testing.T) {
require.InDelta(t, ratio, 5000.0/float64(bf.Len()), 1e-5)
}

func TestSampleParquetDataSize(t *testing.T) {
func testSampleParquetDataSize(t *testing.T, count int) {
s := newTestMydumpLoaderSuite(t)
store, err := storage.NewLocalStorage(s.sourceDir)
require.NoError(t, err)
Expand All @@ -1133,7 +1133,7 @@ func TestSampleParquetDataSize(t *testing.T) {
t.Logf("seed: %d. To reproduce the random behaviour, manually set `rand.New(rand.NewSource(seed))`", seed)
rnd := rand.New(rand.NewSource(seed))
totalRowSize := 0
for i := 0; i < 1000; i++ {
for i := 0; i < count; i++ {
kl := rnd.Intn(20) + 1
key := make([]byte, kl)
kl, err = rnd.Read(key)
Expand Down Expand Up @@ -1167,6 +1167,11 @@ func TestSampleParquetDataSize(t *testing.T) {
require.InDelta(t, totalRowSize, size, float64(totalRowSize)/10)
}

func TestSampleParquetDataSize(t *testing.T) {
t.Run("count=1000", func(t *testing.T) { testSampleParquetDataSize(t, 1000) })
t.Run("count=0", func(t *testing.T) { testSampleParquetDataSize(t, 0) })
}

func TestSetupOptions(t *testing.T) {
// those functions are only used in other components, add this to avoid they
// be deleted mistakenly.
Expand Down

0 comments on commit 4ba4bcd

Please sign in to comment.