/
dump.go
108 lines (94 loc) · 2.52 KB
/
dump.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package main
import (
"encoding/json"
"fmt"
"github.com/m-mizutani/minerva/pkg/models"
"github.com/pkg/errors"
cli "github.com/urfave/cli/v2"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/reader"
)
type dumpArguments struct {
messageFiles cli.StringSlice
indexFiles cli.StringSlice
}
func dumpCommand(args *arguments) *cli.Command {
var dumpArgs dumpArguments
return &cli.Command{
Name: "dump",
Usage: "Invoke merge process",
Action: func(c *cli.Context) error {
return dumpAction(*args, dumpArgs)
},
Flags: []cli.Flag{
&cli.StringSliceFlag{
Name: "index-file",
Aliases: []string{"i"},
Usage: "Index parquet file path",
Destination: &dumpArgs.indexFiles,
},
&cli.StringSliceFlag{
Name: "message-file",
Aliases: []string{"m"},
Usage: "Message parquet file path",
Destination: &dumpArgs.messageFiles,
},
},
}
}
type newRecord func() models.Record
type readRecord func(pr *reader.ParquetReader) (models.Record, error)
func newIndexRecord() models.Record { return &models.IndexRecord{} }
func newMessageRecord() models.Record { return &models.MessageRecord{} }
func readIndexRecord(pr *reader.ParquetReader) (models.Record, error) {
records := make([]models.IndexRecord, 1)
if err := pr.Read(&records); err != nil {
return nil, err
}
return &records[0], nil
}
func readMessageRecord(pr *reader.ParquetReader) (models.Record, error) {
records := make([]models.MessageRecord, 1)
if err := pr.Read(&records); err != nil {
return nil, err
}
return &records[0], nil
}
func dumpAction(args arguments, dumpArgs dumpArguments) error {
for _, msgFile := range dumpArgs.messageFiles.Value() {
if err := dumpParquetFile(msgFile, newMessageRecord, readMessageRecord); err != nil {
return err
}
}
for _, idxFile := range dumpArgs.indexFiles.Value() {
if err := dumpParquetFile(idxFile, newIndexRecord, readIndexRecord); err != nil {
return err
}
}
return nil
}
func dumpParquetFile(filepath string, newRec newRecord, read readRecord) error {
fr, err := local.NewLocalFileReader(filepath)
if err != nil {
return errors.Wrap(err, "Failed to open")
}
defer fr.Close()
pr, err := reader.NewParquetReader(fr, newRec(), 1)
if err != nil {
return err
}
defer pr.ReadStop()
num := int(pr.GetNumRows())
for i := 0; i < num; i++ {
rec, err := read(pr)
if err != nil {
return err
}
raw, err := json.Marshal(rec)
if err != nil {
return err
}
fmt.Println(string(raw))
}
return nil
}