/
archives.go
216 lines (184 loc) · 5.24 KB
/
archives.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
Copyright 2017 Luke Granger-Brown
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package client
import (
"context"
"crypto/md5"
"encoding/binary"
"io"
"sort"
"golang.org/x/sync/errgroup"
"github.com/lukegb/snowstorm/ngdp"
)
const (
archiveConcurrentIndexFetches = 20
archiveIndexChunkSize = 4096
archiveEntriesPerChunk = 170
)
type archiveIndexEntry struct {
file *ngdp.CDNHash
archive *ngdp.CDNHash
size uint32
offset uint32
}
func (ade archiveIndexEntry) asArchiveEntry() ArchiveEntry {
return ArchiveEntry{
Archive: *ade.archive,
Size: ade.size,
Offset: ade.offset,
}
}
type archiveIndexEntries []archiveIndexEntry
func (s archiveIndexEntries) Len() int { return len(s) }
func (s archiveIndexEntries) Less(i, j int) bool { return s[i].file.Less(*s[j].file) }
func (s archiveIndexEntries) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// An ArchiveMapper maps file CDN hashes to their location within the set of archives.
type ArchiveMapper struct {
m []archiveIndexEntry
}
// An ArchiveEntry contains the location of a given file within the archive set.
type ArchiveEntry struct {
Archive ngdp.CDNHash
Size uint32
Offset uint32
}
// Map takes a CDNHash of a desired file and returns the CDNHash of the containing archive, as well as the size and offset within the archive.
//
// If the file does not exist in any known archives, then ok will be false.
func (e *ArchiveMapper) Map(in ngdp.CDNHash) (entry ArchiveEntry, ok bool) {
i := sort.Search(len(e.m), func(n int) bool {
return !e.m[n].file.Less(in)
})
if i < len(e.m) && e.m[i].file.Equal(in) {
return e.m[i].asArchiveEntry(), true
}
return ArchiveEntry{}, false
}
func buildArchiveMap(ctx context.Context, llc *LowLevelClient, cdnInfo ngdp.CDNInfo, archiveHash ngdp.CDNHash) (map[ngdp.CDNHash]archiveIndexEntry, error) {
// Retrieve the archive index.
resp, err := llc.get(ctx, cdnInfo, ngdp.ContentTypeData, archiveHash, ".index")
if err != nil {
return nil, err
}
defer resp.Body.Close()
chunk := make([]byte, archiveIndexChunkSize)
m := make(map[ngdp.CDNHash]archiveIndexEntry)
for {
// Read each chunk, one at a time.
if _, err := io.ReadFull(resp.Body, chunk); err != nil {
if err == io.ErrUnexpectedEOF || err == io.EOF {
// We've reached the end of this archive.
break
}
return nil, err
}
ChunkLoop:
// Parse out each archive entry.
for n := 0; n < archiveEntriesPerChunk; n++ {
entry := chunk[n*0x18 : (n+1)*0x18]
// Check if this entry contains any data.
isAllZeros := true
for x := 0; x < 0x18; x++ {
if entry[x] != 0 {
isAllZeros = false
break
}
}
if isAllZeros {
// This entry has no data; read next chunk.
break ChunkLoop
}
var cdnHash ngdp.CDNHash
for n := 0; n < md5.Size; n++ {
cdnHash[n] = entry[n]
}
size := binary.BigEndian.Uint32(entry[0x10:0x14])
offset := binary.BigEndian.Uint32(entry[0x14:0x18])
m[cdnHash] = archiveIndexEntry{
file: &cdnHash,
archive: &archiveHash,
size: size,
offset: offset,
}
}
}
return m, nil
}
// NewArchiveMapper creates a new archive mapper from the provided set of archives.
func (llc *LowLevelClient) NewArchiveMapper(ctx context.Context, cdnInfo ngdp.CDNInfo, archives []ngdp.CDNHash) (*ArchiveMapper, error) {
// Calculate required worker count.
workerCount := archiveConcurrentIndexFetches
if workerCount > len(archives) {
workerCount = len(archives)
}
workChan := make(chan ngdp.CDNHash)
resultChan := make(chan map[ngdp.CDNHash]archiveIndexEntry)
g, ctx := errgroup.WithContext(ctx)
// Enqueue work into workChan.
g.Go(func() error {
defer close(workChan)
for _, archiveHash := range archives {
select {
case workChan <- archiveHash:
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
// Fetch the archive indices.
for n := 0; n < workerCount; n++ {
g.Go(func() error {
for archiveHash := range workChan {
m, err := buildArchiveMap(ctx, llc, cdnInfo, archiveHash)
if err != nil {
return err
}
select {
case resultChan <- m:
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
}
// Signal main goroutine when all workers have finished.
go func() {
g.Wait()
close(resultChan)
}()
// Process results.
var slcs [][]archiveIndexEntry
count := 0
for miniMap := range resultChan {
slc := make([]archiveIndexEntry, 0, len(miniMap))
for _, v := range miniMap {
slc = append(slc, v)
}
slcs = append(slcs, slc)
count += len(miniMap)
}
// Produce final.
final := make(archiveIndexEntries, 0, count)
for _, slc := range slcs {
final = append(final, slc...)
}
slcs = nil
sort.Sort(final)
// Check if there was an error.
if err := g.Wait(); err != nil {
return nil, err
}
return &ArchiveMapper{final}, nil
}