/
csv.go
143 lines (114 loc) 路 2.77 KB
/
csv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package formats
import (
"context"
"encoding/csv"
"encoding/json"
"fmt"
"log"
"os"
"regexp"
"sync"
"github.com/olivere/elastic/v7"
"golang.org/x/sync/errgroup"
"gopkg.in/cheggaaa/pb.v2"
"github.com/pteich/elastic-query-export/flags"
)
type CSV struct {
Conf *flags.Flags
Outfile *os.File
Workers int
ProgessBar *pb.ProgressBar
}
func (c CSV) Run(ctx context.Context, hits <-chan *elastic.SearchHit) error {
g, ctx := errgroup.WithContext(ctx)
csvout := make(chan []string, c.Workers)
defer close(csvout)
go func() {
w := csv.NewWriter(c.Outfile)
for csvdata := range csvout {
if err := w.Write(csvdata); err != nil {
log.Printf("Error writing CSV data - %v", err)
}
w.Flush()
c.ProgessBar.Increment()
}
}()
sendHeader := sync.Once{}
fields := c.Conf.Fields
headerSent := make(chan struct{})
for i := 0; i < c.Workers; i++ {
g.Go(func() error {
for hit := range hits {
var document map[string]interface{}
var csvdata []string
var outdata string
if err := json.Unmarshal(hit.Source, &document); err != nil {
log.Printf("Error unmarshal JSON from ElasticSearch - %v", err)
}
document = flatten(document)
sendHeader.Do(func() {
if c.Conf.Fields == nil {
for key := range document {
fields = append(fields, key)
}
}
csvout <- fields
close(headerSent)
})
<-headerSent
for _, field := range fields {
if val, ok := document[field]; ok {
if val == nil {
csvdata = append(csvdata, "")
continue
}
// this type switch is probably not really needed anymore
switch val := val.(type) {
case int64:
outdata = fmt.Sprintf("%d", val)
case float64:
d := int(val)
if val == float64(d) {
outdata = fmt.Sprintf("%d", d)
} else {
outdata = fmt.Sprintf("%f", val)
}
default:
outdata = removeLBR(fmt.Sprintf("%v", val))
}
csvdata = append(csvdata, outdata)
} else {
csvdata = append(csvdata, "")
}
}
// send string array to csv output
csvout <- csvdata
select {
default:
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
}
return g.Wait()
}
func flatten(document map[string]interface{}) map[string]interface{} {
result := map[string]interface{}{}
for key, value := range document {
result[key] = value
childDocument, ok := value.(map[string]interface{})
if !ok {
continue
}
for subKey, subValue := range flatten(childDocument) {
result[key+"."+subKey] = subValue
}
}
return result
}
func removeLBR(text string) string {
re := regexp.MustCompile(`\x{000D}\x{000A}|[\x{000A}\x{000B}\x{000C}\x{000D}\x{0085}\x{2028}\x{2029}]`)
return re.ReplaceAllString(text, ``)
}