Skip to content

Commit

Permalink
apacheGH-34657: [Go] Add ValueString(i int) string to array (apache#3…
Browse files Browse the repository at this point in the history
…4986)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: apache#34657

Authored-by: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
  • Loading branch information
yevgenypats authored and rtpsw committed May 16, 2023
1 parent f9ae180 commit d57710a
Show file tree
Hide file tree
Showing 46 changed files with 825 additions and 33 deletions.
2 changes: 2 additions & 0 deletions go/arrow/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ type Array interface {
// IsValid returns true if value at index is not null.
// NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len.
IsValid(i int) bool
// ValueStr returns the value at index as a string.
ValueStr(i int) string

// Get single value to be marshalled with `json.Marshal`
GetOneForMarshal(i int) interface{}
Expand Down
1 change: 1 addition & 0 deletions go/arrow/array/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
const (
// UnknownNullCount specifies the NullN should be calculated from the null bitmap buffer.
UnknownNullCount = -1
NullValueStr = "(null)"
)

type array struct {
Expand Down
17 changes: 16 additions & 1 deletion go/arrow/array/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package array

import (
"bytes"
"encoding/base64"
"fmt"
"strings"
"unsafe"
Expand Down Expand Up @@ -56,7 +57,15 @@ func (a *Binary) Value(i int) []byte {
return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]]
}

// ValueString returns the string at index i without performing additional allocations.
// ValueString returns the string at index i
func (a *Binary) ValueStr(i int) string {
if a.IsNull(i) {
return NullValueStr
}
return base64.StdEncoding.EncodeToString(a.Value(i))
}

// ValueStr returns the string at index i without performing additional allocations.
// The string is only valid for the lifetime of the Binary array.
func (a *Binary) ValueString(i int) string {
b := a.Value(i)
Expand Down Expand Up @@ -191,6 +200,12 @@ func (a *LargeBinary) Value(i int) []byte {
return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]]
}

func (a *LargeBinary) ValueStr(i int) string {
if a.IsNull(i) {
return NullValueStr
}
return base64.StdEncoding.EncodeToString(a.Value(i))
}
func (a *LargeBinary) ValueString(i int) string {
b := a.Value(i)
return *(*string)(unsafe.Pointer(&b))
Expand Down
8 changes: 8 additions & 0 deletions go/arrow/array/binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ func TestBinary(t *testing.T) {
assert.Equal(t, []byte("AAA"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, []byte("BBBB"), a.Value(2))
assert.Equal(t, "QUFB", a.ValueStr(0))
assert.Equal(t, "(null)", a.ValueStr(1))
a.Release()

// Test builder reset and NewArray API.
Expand All @@ -60,6 +62,8 @@ func TestBinary(t *testing.T) {
assert.Equal(t, []byte("AAA"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, []byte("BBBB"), a.Value(2))
assert.Equal(t, "QUFB", a.ValueStr(0))
assert.Equal(t, "(null)", a.ValueStr(1))
a.Release()

b.Release()
Expand Down Expand Up @@ -92,6 +96,8 @@ func TestLargeBinary(t *testing.T) {
assert.Equal(t, []byte("AAA"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, []byte("BBBB"), a.Value(2))
assert.Equal(t, "QUFB", a.ValueStr(0))
assert.Equal(t, "(null)", a.ValueStr(1))
a.Release()

// Test builder reset and NewArray API.
Expand All @@ -102,6 +108,8 @@ func TestLargeBinary(t *testing.T) {
assert.Equal(t, []byte("AAA"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, []byte("BBBB"), a.Value(2))
assert.Equal(t, "QUFB", a.ValueStr(0))
assert.Equal(t, "(null)", a.ValueStr(1))
a.Release()

b.Release()
Expand Down
20 changes: 20 additions & 0 deletions go/arrow/array/binarybuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,26 @@ func (b *BinaryBuilder) appendNextOffset() {
b.appendOffsetVal(numBytes)
}

func (b *BinaryBuilder) AppendValueFromString(s string) error {
if s == NullValueStr {
b.AppendNull()
return nil
}
switch b.dtype.ID() {
case arrow.BINARY, arrow.LARGE_BINARY:
decodedVal, err := base64.StdEncoding.DecodeString(s)
if err != nil {
return fmt.Errorf("could not decode base64 string: %w", err)
}
b.Append(decodedVal)
case arrow.STRING, arrow.LARGE_STRING:
b.Append([]byte(s))
default:
return fmt.Errorf("cannot append string to type %s", b.dtype)
}
return nil
}

func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error {
t, err := dec.Token()
if err != nil {
Expand Down
5 changes: 5 additions & 0 deletions go/arrow/array/binarybuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func TestBinaryBuilder(t *testing.T) {
ab.AppendNull()
} else {
ab.Append(v)

}
}

Expand All @@ -50,8 +51,12 @@ func TestBinaryBuilder(t *testing.T) {
}
assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i)
}
// Zm9v is foo in base64
assert.NoError(t, ab.AppendValueFromString("Zm9v"))

ar := ab.NewBinaryArray()
assert.Equal(t, "Zm9v", ar.ValueStr(5))

ab.Release()
ar.Release()

Expand Down
9 changes: 9 additions & 0 deletions go/arrow/array/boolean.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package array

import (
"fmt"
"strconv"
"strings"

"github.com/apache/arrow/go/v12/arrow"
Expand Down Expand Up @@ -55,6 +56,14 @@ func (a *Boolean) Value(i int) bool {
return bitutil.BitIsSet(a.values, a.array.data.offset+i)
}

func (a *Boolean) ValueStr(i int) string {
if a.IsNull(i) {
return NullValueStr
} else {
return strconv.FormatBool(a.Value(i))
}
}

func (a *Boolean) String() string {
o := new(strings.Builder)
o.WriteString("[")
Expand Down
4 changes: 4 additions & 0 deletions go/arrow/array/boolean_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/stretchr/testify/assert"
)

func TestBooleanSliceData(t *testing.T) {
Expand Down Expand Up @@ -285,4 +286,7 @@ func TestBooleanStringer(t *testing.T) {
if got := out.String(); got != want {
t.Fatalf("invalid stringer:\ngot= %q\nwant=%q", got, want)
}
assert.Equal(t, "true", arr.ValueStr(0))
assert.Equal(t, "false", arr.ValueStr(1))
assert.Equal(t, "(null)", arr.ValueStr(2))
}
13 changes: 13 additions & 0 deletions go/arrow/array/booleanbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,19 @@ func (b *BooleanBuilder) AppendEmptyValue() {
b.UnsafeAppend(false)
}

func (b *BooleanBuilder) AppendValueFromString(s string) error {
if s == NullValueStr {
b.AppendNull()
return nil
}
val, err := strconv.ParseBool(s)
if err != nil {
return err
}
b.Append(val)
return nil
}

func (b *BooleanBuilder) UnsafeAppend(v bool) {
bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
if v {
Expand Down
8 changes: 6 additions & 2 deletions go/arrow/array/booleanbuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,20 @@ func TestBooleanBuilder_AppendValues(t *testing.T) {

b := array.NewBooleanBuilder(mem)

exp := tools.Bools(1, 1, 0, 1, 1, 0, 1, 0)
got := make([]bool, len(exp))
exp := tools.Bools(1, 1, 0, 1, 1, 0)
got := make([]bool, len(exp) + 2)

b.AppendValues(exp, nil)
assert.NoError(t, b.AppendValueFromString("true"))
assert.NoError(t, b.AppendValueFromString("false"))
exp = tools.Bools(1, 1, 0, 1, 1, 0, 1, 0)
a := b.NewBooleanArray()
b.Release()
for i := 0; i < a.Len(); i++ {
got[i] = a.Value(i)
}
assert.Equal(t, exp, got)

a.Release()
}

Expand Down
3 changes: 3 additions & 0 deletions go/arrow/array/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type Builder interface {
// AppendEmptyValue adds a new zero value of the appropriate type
AppendEmptyValue()

// AppendValueFromString adds a new value from a string. Inverse of array.ValueStr(i int) string
AppendValueFromString(string) error

// Reserve ensures there is enough space for appending n elements
// by checking the capacity and calling Resize if necessary.
Reserve(n int)
Expand Down
21 changes: 21 additions & 0 deletions go/arrow/array/decimal128.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ func NewDecimal128Data(data arrow.ArrayData) *Decimal128 {
}

func (a *Decimal128) Value(i int) decimal128.Num { return a.values[i] }
func (a *Decimal128) ValueStr(i int) string {
if a.IsNull(i) {
return NullValueStr
} else {
return a.GetOneForMarshal(i).(string)
}
}

func (a *Decimal128) Values() []decimal128.Num { return a.values }

Expand Down Expand Up @@ -260,6 +267,20 @@ func (b *Decimal128Builder) newData() (data *Data) {
return
}

func (b *Decimal128Builder) AppendValueFromString(s string) error {
if s == NullValueStr {
b.AppendNull()
return nil
}
val, err := decimal128.FromString(s, b.dtype.Precision, b.dtype.Scale)
if err != nil {
b.AppendNull()
return err
}
b.Append(val)
return nil
}

func (b *Decimal128Builder) UnmarshalOne(dec *json.Decoder) error {
t, err := dec.Token()
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions go/arrow/array/decimal128_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ func TestDecimal128Slice(t *testing.T) {
if got, want := v.String(), `[(null) {4 -4}]`; got != want {
t.Fatalf("got=%q, want=%q", got, want)
}
assert.Equal(t, "(null)", v.ValueStr(0))
assert.Equal(t, "-7.378697629e+18", v.ValueStr(1))

if got, want := v.NullN(), 1; got != want {
t.Fatalf("got=%q, want=%q", got, want)
Expand Down
21 changes: 21 additions & 0 deletions go/arrow/array/decimal256.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ func NewDecimal256Data(data arrow.ArrayData) *Decimal256 {
}

func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] }
func (a *Decimal256) ValueStr(i int) string {
if a.IsNull(i) {
return NullValueStr
} else {
return a.GetOneForMarshal(i).(string)
}
}

func (a *Decimal256) Values() []decimal256.Num { return a.values }

Expand Down Expand Up @@ -259,6 +266,20 @@ func (b *Decimal256Builder) newData() (data *Data) {
return
}

func (b *Decimal256Builder) AppendValueFromString(s string) error {
if s == NullValueStr {
b.AppendNull()
return nil
}
val, err := decimal256.FromString(s, b.dtype.Precision, b.dtype.Scale)
if err != nil {
b.AppendNull()
return err
}
b.Append(val)
return nil
}

func (b *Decimal256Builder) UnmarshalOne(dec *json.Decoder) error {
t, err := dec.Token()
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions go/arrow/array/decimal256_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ func TestDecimal256Slice(t *testing.T) {
if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want {
t.Fatalf("got=%q, want=%q", got, want)
}
assert.Equal(t, "(null)", v.ValueStr(0))
assert.Equal(t, "2.510840694e+57", v.ValueStr(1))

if got, want := v.NullN(), 1; got != want {
t.Fatalf("got=%q, want=%q", got, want)
Expand Down
8 changes: 8 additions & 0 deletions go/arrow/array/dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ func (d *Dictionary) CanCompareIndices(other *Dictionary) bool {
return ArraySliceEqual(d.Dictionary(), 0, minlen, other.Dictionary(), 0, minlen)
}

func (d *Dictionary) ValueStr(i int) string {
return d.Dictionary().ValueStr(d.GetValueIndex(i))
}

func (d *Dictionary) String() string {
return fmt.Sprintf("{ dictionary: %v\n indices: %v }", d.Dictionary(), d.Indices())
}
Expand Down Expand Up @@ -737,6 +741,10 @@ func (b *dictionaryBuilder) Unmarshal(dec *json.Decoder) error {
return b.AppendArray(arr)
}

func (b *dictionaryBuilder) AppendValueFromString(s string) error {
return fmt.Errorf("%w: AppendValueFromString to dictionary not yet implemented", arrow.ErrNotImplemented)
}

func (b *dictionaryBuilder) UnmarshalOne(dec *json.Decoder) error {
return errors.New("unmarshal json to dictionary not yet implemented")
}
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/dictionary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ func TestDictionaryGetValueIndex(t *testing.T) {
const offset = 1
slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len()))
defer slicedDictArr.Release()

assert.EqualValues(t, "10", slicedDictArr.(*array.Dictionary).ValueStr(0))
for i := 0; i < indices.Len(); i++ {
assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i))
if i < slicedDictArr.Len() {
Expand Down
15 changes: 15 additions & 0 deletions go/arrow/array/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"math"
"reflect"
"strings"
"sync/atomic"

"github.com/apache/arrow/go/v12/arrow"
Expand Down Expand Up @@ -192,6 +193,15 @@ func (r *RunEndEncoded) GetPhysicalLength() int {
return encoded.GetPhysicalLength(r.data)
}

func (r *RunEndEncoded) ValueStr(i int) string {
value := r.values.GetOneForMarshal(i)
if byts, ok := value.(json.RawMessage); ok {
value = string(byts)
}
return fmt.Sprintf("{%d -> %v}",
r.ends.GetOneForMarshal(i),
value)
}
func (r *RunEndEncoded) String() string {
var buf bytes.Buffer
buf.WriteByte('[')
Expand Down Expand Up @@ -397,6 +407,11 @@ func (b *RunEndEncodedBuilder) newData() (data *Data) {
return
}

func (b *RunEndEncodedBuilder) AppendValueFromString(s string) error {
dec := json.NewDecoder(strings.NewReader(s))
return b.UnmarshalOne(dec)
}

func (b *RunEndEncodedBuilder) UnmarshalOne(dec *json.Decoder) error {
var value interface{}
if err := dec.Decode(&value); err != nil {
Expand Down
1 change: 1 addition & 0 deletions go/arrow/array/encoded_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ func TestRunEndEncodedBuilder(t *testing.T) {
assert.Equal(t, "of", strValues.Value(3))
assert.Equal(t, "RLE", strValues.Value(4))
assert.True(t, strValues.IsNull(5))
assert.Equal(t, "Hello", strValues.ValueStr(0))
}

func TestREEBuilderOverflow(t *testing.T) {
Expand Down

0 comments on commit d57710a

Please sign in to comment.