/
transporter_filestore_to_duckDB.go
75 lines (61 loc) · 1.63 KB
/
transporter_filestore_to_duckDB.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package duckdb
import (
"context"
"fmt"
"github.com/rilldata/rill/runtime/drivers"
"github.com/rilldata/rill/runtime/pkg/fileutil"
"go.uber.org/zap"
)
type fileStoreToDuckDB struct {
to drivers.OLAPStore
from drivers.FileStore
logger *zap.Logger
}
func NewFileStoreToDuckDB(from drivers.FileStore, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter {
return &fileStoreToDuckDB{
to: to,
from: from,
logger: logger,
}
}
var _ drivers.Transporter = &fileStoreToDuckDB{}
func (t *fileStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOptions) error {
sinkCfg, err := parseSinkProperties(sinkProps)
if err != nil {
return err
}
srcCfg, err := parseFileSourceProperties(srcProps)
if err != nil {
return err
}
t.logger = t.logger.With(zap.String("source", sinkCfg.Table))
localPaths, err := t.from.FilePaths(ctx, srcProps)
if err != nil {
return err
}
if len(localPaths) == 0 {
return fmt.Errorf("no files to ingest")
}
size := fileSize(localPaths)
if !sizeWithinStorageLimits(t.to, size) {
return drivers.ErrStorageLimitExceeded
}
opts.Progress.Target(size, drivers.ProgressUnitByte)
var format string
if srcCfg.Format != "" {
format = fmt.Sprintf(".%s", srcCfg.Format)
} else {
format = fileutil.FullExt(localPaths[0])
}
// Ingest data
from, err := sourceReader(localPaths, format, srcCfg.DuckDB)
if err != nil {
return err
}
err = t.to.CreateTableAsSelect(ctx, sinkCfg.Table, false, fmt.Sprintf("SELECT * FROM %s", from))
if err != nil {
return err
}
opts.Progress.Observe(size, drivers.ProgressUnitByte)
return nil
}