/
main.go
255 lines (229 loc) · 6.68 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
// Copyright (c) 2016 Paul Jolly <paul@myitcv.org.uk>, all rights reserved.
// Use of this document is governed by a license found in the LICENSE document.
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
)
// assumes that files have been provided in the order oldest to newset
// read files, last first, end-to-beginning, taking unique history
// entries (i.e. most recent history entries considered first)
//
// output the reverse of that... i.e. oldest first, newest last
func main() {
x := os.Args[1:]
var out []string
seen := make(map[string]struct{})
for i := len(x) - 1; i >= 0; i-- {
fn := x[i]
fh, err := os.Open(fn)
if err != nil {
panic(fmt.Sprintf("Could not open %v", fn))
}
scanner := NewScanner(fh)
for scanner.Scan() {
if _, ok := seen[scanner.Text()]; !ok {
seen[scanner.Text()] = struct{}{}
out = append(out, scanner.Text())
}
}
err = fh.Close()
if err != nil {
panic(fmt.Sprintf("Could not close %v", fn))
}
}
for i := len(out) - 1; i >= 0; i-- {
fmt.Println(out[i])
}
}
// what follows is a copy of https://github.com/rogpeppe/rog-go/blob/master/reverse/scan.go
const maxBufSize = 64 * 1024
// Scanner presents the same interface as bufio.Scanner
// except it scans tokens in reverse from the end of
// a file instead of forwards from the beginning.
//
// It may not work correctly if the split function can return an
// error from reading half way through a token.
// That is not the case for any of the Scan* functions
// in bufio.
type Scanner struct {
r io.ReadSeeker
split bufio.SplitFunc
// buf holds currently buffered data.
buf []byte
// offset holds the file offset of the data
// in buf.
offset int64
// atEOF reports whether the buffer
// is located at the end of the file.
atEOF bool
// tokens holds any currently unscanned
// tokens in buf.
tokens [][]byte
// partialToken holds the size of the partial
// token at the start of buf.
partialToken int
// err holds any error encountered.
err error
}
// TODO make NewScanner take a ReaderAt rather
// than a ReadSeeker.
// TODO provide a FileOffset method that returns
// the offset of the token in the file.
// NewScanner returns a new Scanner to read tokens
// in reverse from r. The split function defaults to bufio.ScanLines.
func NewScanner(r io.ReadSeeker) *Scanner {
b := &Scanner{
r: r,
buf: make([]byte, 4096),
atEOF: true,
split: bufio.ScanLines,
}
b.offset, b.err = r.Seek(0, 2)
return b
}
func (b *Scanner) fillbuf() error {
b.tokens = b.tokens[:0]
if b.offset == 0 {
return io.EOF
}
space := len(b.buf) - b.partialToken
if space == 0 {
// Partial token fills the buffer, so expand it.
if len(b.buf) >= maxBufSize {
return errors.New("token too long")
}
n := len(b.buf) * 2
if n > maxBufSize {
n = maxBufSize
}
newBuf := make([]byte, n)
copy(newBuf, b.buf[0:b.partialToken])
b.buf = newBuf
space = len(b.buf) - b.partialToken
}
if int64(space) > b.offset {
// We have less than the given buffer's space
// left to read, so shrink the buffer to simplify
// the remaining logic by preserving the
// invariants that b.partialToken + space == len(buf)
// and the data is read into the start of buf.
b.buf = b.buf[0 : b.partialToken+int(b.offset)]
space = len(b.buf) - b.partialToken
}
newOffset := b.offset - int64(space)
// Copy old partial token to end of buffer.
copy(b.buf[space:], b.buf[0:b.partialToken])
_, err := b.r.Seek(newOffset, 0)
if err != nil {
return err
}
b.offset = newOffset
if _, err := io.ReadFull(b.r, b.buf[0:space]); err != nil {
// TODO Cope better with io.UnexpectedEOF at the
// end of a file - historically some versions of NFS
// can report a file size that's larger than the actual
// file size.
return err
}
// Populate b.tokens with the tokens in the buffer.
if b.offset > 0 {
// We're not at the start of the file - read the first
// token to find out where the token boundary is, but
// don't treat it as an actual token, because we're
// probably not at its start.
advance, _, err := b.split(b.buf, b.atEOF)
if err != nil {
// If the split function can return an error
// when starting at a non-token boundary, this
// will happen and there's not much we can do
// about it other than scanning forward a byte
// at a time in a horribly inefficient manner,
// so just return the error.
return err
}
b.partialToken = advance
if advance == 0 || advance == len(b.buf) {
// There are no more tokens in the buffer,
// or the single token fills the entire buffer
// so try again (the buffer will expand if
// necessary)
return b.fillbuf()
}
} else {
b.partialToken = 0
}
for i := b.partialToken; i < len(b.buf); {
advance, token, err := b.split(b.buf[i:], b.atEOF)
if err != nil {
// We've got an error - avoid returning
// any tokens encountered earlier in this
// buffer scan, otherwise we risk skipping
// tokens before returning the error.
b.tokens = b.tokens[:0]
return err
}
if advance == 0 {
// There's no remaining token in the buffer.
break
}
b.tokens = append(b.tokens, token)
i += advance
}
b.atEOF = false
if len(b.tokens) == 0 {
// There were no tokens found - that means that
// although we did find a partial token at the start,
// there were no other tokens, so we need
// to scan back further.
return b.fillbuf()
}
return nil
}
// Scan advances the Scanner to the next token, which will then be
// available through the Bytes or Text method. It returns false when the
// scan stops, either by reaching the end of the input or an error.
// After Scan returns false, the Err method will return any error that
// occurred during scanning, except that if it was io.EOF, Err
// will return nil.
func (b *Scanner) Scan() bool {
if len(b.tokens) > 0 {
b.tokens = b.tokens[0 : len(b.tokens)-1]
}
if len(b.tokens) > 0 {
return true
}
if b.err != nil {
return false
}
b.err = b.fillbuf()
return len(b.tokens) > 0
}
// Split sets the split function for the Scanner. If called, it must be
// called before Scan. The default split function is bufio.ScanLines.
func (b *Scanner) Split(split bufio.SplitFunc) {
b.split = split
}
// Bytes returns the most recent token generated by a call to Scan.
// The underlying array may point to data that will be overwritten
// by a subsequent call to Scan. It does no allocation.
func (b *Scanner) Bytes() []byte {
return b.tokens[len(b.tokens)-1]
}
// Text returns the most recent token generated by a call to Scan
// as a newly allocated string holding its bytes.
func (b *Scanner) Text() string {
return string(b.Bytes())
}
func (b *Scanner) Err() error {
if len(b.tokens) > 0 {
return nil
}
if b.err == io.EOF {
return nil
}
return b.err
}