-
Notifications
You must be signed in to change notification settings - Fork 1
/
s2db_arrow_reader.go
87 lines (78 loc) · 3.03 KB
/
s2db_arrow_reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package s2db_arrow_driver
import (
"context"
"errors"
"github.com/apache/arrow/go/v12/arrow"
)
// S2DBArrowReader provides an API for reading arrow data from the SingleStore database
// The NewS2DBArrowReader function should be used to create a new instance of the S2DBArrowReader
type S2DBArrowReader interface {
// GetNextArrowRecordBatch fetches a single arrow.Record from the server
// It returns nil as the first part of the result tuple if there are no more rows to fetch
// The returned Record must be Release()'d after use.
GetNextArrowRecordBatch() (arrow.Record, error)
// Close finalizes reading of the query results
// It releases all acquired resources
Close() error
}
type S2DBArrowReaderConfig struct {
// Conn is a sql.DB object which will be used to communicate with the database
Conn S2SqlDbWrapper
// Query is a SQL query that will be executed
Query string
// Args are arguments for placeholder parameters in the query
Args []interface{}
// RecordSize identifies maximum number of rows in the resulting records
// By default it is 10000
RecordSize int64
// UseClientConvesion indicates if the data should be converted to Arrow Record
// format on the client. For production use, it should be false
UseClientConvesion bool
// ParallelReadConfig specifies aditional configurations for parallel read
// By default it is nil and it means that parallel read is not used
ParallelReadConfig *S2DBParallelReadConfig
// EnableQueryLogging controls whether the driver should generate debug logs
// Debug logs are printed to the standard output
EnableQueryLogging bool
}
type S2DBParallelReadConfig struct {
// DatabaseName is a name of the SingleStore database
// It is needed to get number of partitions from the database for parallel read
DatabaseName string
// ChannelSize specifies size of the channel buffer
// Channel is used to store references to Arrow Records while reading is happening
// and transfer them to the main goroutine
// The default value is 10000
ChannelSize int64
// Controls whether to profile the query
// Profiling result is printed to the standart output
EnableDebugProfiling bool
}
// NewS2DBArrowReader creates an instance of S2DBArrowReader
// It sends a query to the database server for execution
func NewS2DBArrowReader(ctx context.Context, conf S2DBArrowReaderConfig) (S2DBArrowReader, error) {
if conf.Conn == nil {
return nil, errors.New("'Conn' is a required configuration")
}
if conf.Query == "" {
return nil, errors.New("'Query' is a required configuration")
}
if conf.RecordSize == 0 {
conf.RecordSize = 10000
}
if conf.ParallelReadConfig == nil {
if conf.UseClientConvesion {
return NewS2DBArrowReaderImpl(ctx, conf)
} else {
return NewS2DBServerArrowReaderImpl(ctx, conf)
}
} else {
if conf.ParallelReadConfig.DatabaseName == "" {
return nil, errors.New("'DatabaseName' is a required configuration for parallel read")
}
if conf.ParallelReadConfig.ChannelSize == 0 {
conf.ParallelReadConfig.ChannelSize = 10000
}
return NewS2DBArrowReaderParallelImpl(ctx, conf)
}
}