-
Notifications
You must be signed in to change notification settings - Fork 65
/
dump.go
58 lines (51 loc) 路 1.63 KB
/
dump.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package cmd
import (
"fmt"
"os"
"strings"
"github.com/dustin/go-humanize"
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
)
var dumpCmd = &cobra.Command{
Use: "dump",
Example: "parquet-tool dump <file.parquet>",
Short: "dump the database",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return dump(args[0])
},
}
func dump(file string) error {
pf, closer, err := openParquetFile(file)
if err != nil {
return err
}
defer closer.Close()
fmt.Println("schema:", pf.Schema())
meta := pf.Metadata()
fmt.Println("Num Rows:", meta.NumRows)
for i, rg := range meta.RowGroups {
fmt.Println("\t Row group:", i)
fmt.Println("\t\t Row Count:", rg.NumRows)
fmt.Println("\t\t Row size:", humanize.Bytes(uint64(rg.TotalByteSize)))
fmt.Println("\t\t Columns:")
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Col", "Type", "NumVal", "Encoding", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%"})
for _, ds := range rg.Columns {
table.Append(
[]string{
strings.Join(ds.MetaData.PathInSchema, "/"),
ds.MetaData.Type.String(),
fmt.Sprintf("%d", ds.MetaData.NumValues),
fmt.Sprintf("%s", ds.MetaData.Encoding),
humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)),
humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)),
fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100),
fmt.Sprintf("%.2f", float64(ds.MetaData.TotalCompressedSize)/float64(rg.TotalByteSize)*100),
})
}
table.Render()
}
return nil
}