-
Notifications
You must be signed in to change notification settings - Fork 0
/
stringcol.go
97 lines (82 loc) · 1.84 KB
/
stringcol.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package cof
import (
"encoding/binary"
"fmt"
"io"
)
const MaxSize = 1e9
type StringCol struct {
dict map[string]int64
values []int64
}
func NewStringCol() *StringCol {
return &StringCol{
dict: map[string]int64{},
values: []int64{},
}
}
func (c *StringCol) Type() string {
return "string"
}
func (s *StringCol) Rows() int {
return len(s.values)
}
func (c *StringCol) Add(ival interface{}) error {
stringVal, ok := ival.(string)
if !ok {
return ErrBadType
}
idx, ok := c.dict[stringVal]
if !ok {
idx = int64(len(c.dict))
c.dict[stringVal] = idx
}
c.values = append(c.values, idx)
return nil
}
func (c *StringCol) WriteTo(w io.Writer) (int64, error) {
written := int64(0)
buf := make([]byte, binary.MaxVarintLen64)
for _, val := range c.values {
n := binary.PutVarint(buf, val)
nwritten, err := w.Write(buf[:n])
if err != nil {
return written, err
}
written += int64(nwritten)
}
fmt.Println("Dict Len", len(c.dict))
return written, nil
}
func (c *StringCol) Dict() map[string]int64 {
return c.dict
}
// implement dictionary encoding
// or prefix removal
// and add needles
// func (c *StringCol) WriteTo2(w io.Writer) (int64, error) {
// written := int64(0)
// for _, val := range c.values {
// // writing the length of the string
// err := binary.Write(w, binary.LittleEndian, uint32(len(val)))
// if err != nil {
// return written, err
// }
// written += int64(binary.Size(uint32(len(val))))
// // writing the string values {length, bytes}
// nwritten, err := w.Write([]byte(val))
// if err != nil {
// return written, err
// }
// // update the total written bytes counter
// written += int64(nwritten)
// }
// return written, nil
// }
func (c *StringCol) ReadFrom(rdr io.Reader) []string {
return []string{}
}
func (c *StringCol) Reset() error {
c.values = []int64{}
return nil
}