From ec2e083b96cabf73cf3d475f2a4c56b0b8e4727c Mon Sep 17 00:00:00 2001 From: shaoting-huang Date: Wed, 3 Jul 2024 16:56:36 +0800 Subject: [PATCH] add ut Signed-off-by: shaoting-huang --- internal/storage/serde.go | 3 ++- internal/storage/serde_test.go | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/internal/storage/serde.go b/internal/storage/serde.go index 5249c2f93f424..3ed966cae81d8 100644 --- a/internal/storage/serde.go +++ b/internal/storage/serde.go @@ -676,9 +676,10 @@ func (sfw *singleFieldRecordWriter) Close() { func newSingleFieldRecordWriter(fieldId FieldID, field arrow.Field, writer io.Writer) (*singleFieldRecordWriter, error) { schema := arrow.NewSchema([]arrow.Field{field}, nil) + + // use writer properties as same as payload writer's for now fw, err := pqarrow.NewFileWriter(schema, writer, parquet.NewWriterProperties( - parquet.WithMaxRowGroupLength(math.MaxInt64), // No additional grouping for now. parquet.WithCompression(compress.Codecs.Zstd), parquet.WithCompressionLevel(3)), pqarrow.DefaultWriterProps()) diff --git a/internal/storage/serde_test.go b/internal/storage/serde_test.go index 87d2aacfa43ae..b9bac7ae7fecd 100644 --- a/internal/storage/serde_test.go +++ b/internal/storage/serde_test.go @@ -21,6 +21,7 @@ import ( "reflect" "testing" + "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" "github.com/stretchr/testify/assert" @@ -100,6 +101,23 @@ func TestSerDe(t *testing.T) { } } +func TestArrowSchema(t *testing.T) { + t.Run("test composite record", func(t *testing.T) { + fields := []arrow.Field{{Name: "1", Type: arrow.BinaryTypes.String, Nullable: true}} + builder := array.NewBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String) + builder.AppendValueFromString("1") + arrays := []arrow.Array{builder.NewArray()} + cr := &compositeRecord{ + recs: make(map[FieldID]arrow.Record, 1), + schema: make(map[FieldID]schemapb.DataType, 1), + } + cr.recs[0] = array.NewRecord(arrow.NewSchema(fields, nil), arrays, 1) + cr.schema[0] = schemapb.DataType_String + expected := arrow.NewSchema(fields, nil) + assert.Equal(t, expected, cr.ArrowSchema()) + }) +} + func BenchmarkDeserializeReader(b *testing.B) { len := 1000000 blobs, err := generateTestData(len)