/
compression.go
126 lines (104 loc) · 2.9 KB
/
compression.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet
import (
"bytes"
"fmt"
"github.com/golang/snappy"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/compress/zstd"
"github.com/mindhash/arrow-parquet-go/gen-go/parquet"
"github.com/pierrec/lz4"
"io/ioutil"
"sync"
)
type compressionCodec parquet.CompressionCodec
var zstdOnce sync.Once
var zstdEnc *zstd.Encoder
var zstdDec *zstd.Decoder
func initZstd() {
zstdOnce.Do(func() {
zstdEnc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true))
zstdDec, _ = zstd.NewReader(nil)
})
}
func (c compressionCodec) compress(buf []byte) ([]byte, error) {
switch parquet.CompressionCodec(c) {
case parquet.CompressionCodec_UNCOMPRESSED:
return buf, nil
case parquet.CompressionCodec_SNAPPY:
return snappy.Encode(nil, buf), nil
case parquet.CompressionCodec_GZIP:
byteBuf := new(bytes.Buffer)
writer := gzip.NewWriter(byteBuf)
n, err := writer.Write(buf)
if err != nil {
return nil, err
}
if n != len(buf) {
return nil, fmt.Errorf("short writes")
}
if err = writer.Flush(); err != nil {
return nil, err
}
if err = writer.Close(); err != nil {
return nil, err
}
return byteBuf.Bytes(), nil
case parquet.CompressionCodec_LZ4:
byteBuf := new(bytes.Buffer)
writer := lz4.NewWriter(byteBuf)
n, err := writer.Write(buf)
if err != nil {
return nil, err
}
if n != len(buf) {
return nil, fmt.Errorf("short writes")
}
if err = writer.Flush(); err != nil {
return nil, err
}
if err = writer.Close(); err != nil {
return nil, err
}
return byteBuf.Bytes(), nil
case parquet.CompressionCodec_ZSTD:
initZstd()
return zstdEnc.EncodeAll(buf, nil), nil
}
return nil, fmt.Errorf("invalid compression codec %v", c)
}
func (c compressionCodec) uncompress(buf []byte) ([]byte, error) {
switch parquet.CompressionCodec(c) {
case parquet.CompressionCodec_UNCOMPRESSED:
return buf, nil
case parquet.CompressionCodec_SNAPPY:
return snappy.Decode(nil, buf)
case parquet.CompressionCodec_GZIP:
reader, err := gzip.NewReader(bytes.NewReader(buf))
if err != nil {
return nil, err
}
defer reader.Close()
return ioutil.ReadAll(reader)
case parquet.CompressionCodec_LZ4:
return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(buf)))
case parquet.CompressionCodec_ZSTD:
initZstd()
return zstdDec.DecodeAll(buf, nil)
}
return nil, fmt.Errorf("invalid compression codec %v", c)
}