/
index.go
160 lines (142 loc) · 5.2 KB
/
index.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package index
import (
"encoding/binary"
"fmt"
"io"
internalio "github.com/sevenrats/boxo/ipld/car/v2/internal/io"
"github.com/ipfs/go-cid"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-multihash"
"github.com/multiformats/go-varint"
)
// CarIndexNone is a sentinel value used as a multicodec code for the index indicating no index.
const CarIndexNone = 0x300000
type (
// Record is a pre-processed record of a car item and location.
Record struct {
cid.Cid
Offset uint64
}
// Index provides an interface for looking up byte offset of a given CID.
//
// Note that each indexing mechanism is free to match CIDs however it
// sees fit. For example, multicodec.CarIndexSorted only indexes
// multihash digests, meaning that Get and GetAll will find matching
// blocks even if the CID's encoding multicodec differs. Other index
// implementations might index the entire CID, the entire multihash, or
// just part of a multihash's digest.
//
// See: multicodec.CarIndexSorted, multicodec.CarMultihashIndexSorted
Index interface {
// Codec provides the multicodec code that the index implements.
//
// Note that this may return a reserved code if the index
// implementation is not defined in a spec.
Codec() multicodec.Code
// Marshal encodes the index in serial form.
Marshal(w io.Writer) (uint64, error)
// Unmarshal decodes the index from its serial form.
// Note, this function will copy the entire index into memory.
//
// Do not unmarshal index from untrusted CARv2 files. Instead, the index should be
// regenerated from the CARv2 data payload.
Unmarshal(r io.Reader) error
// Load inserts a number of records into the index.
// Note that Index will load all given records. Any filtering of the records such as
// exclusion of CIDs with multihash.IDENTITY code must occur prior to calling this function.
//
// Further, the actual information extracted and indexed from the given records entirely
// depends on the concrete index implementation.
// For example, some index implementations may only store partial multihashes.
Load([]Record) error
// GetAll looks up all blocks matching a given CID,
// calling a function for each one of their offsets.
//
// GetAll stops if the given function returns false,
// or there are no more offsets; whichever happens first.
//
// If no error occurred and the CID isn't indexed,
// meaning that no callbacks happen,
// ErrNotFound is returned.
GetAll(cid.Cid, func(uint64) bool) error
}
// IterableIndex is an index which support iterating over it's elements
IterableIndex interface {
Index
// ForEach takes a callback function that will be called
// on each entry in the index. The arguments to the callback are
// the multihash of the element, and the offset in the car file
// where the element appears.
//
// If the callback returns a non-nil error, the iteration is aborted,
// and the ForEach function returns the error to the user.
//
// An index may contain multiple offsets corresponding to the same multihash, e.g. via duplicate blocks.
// In such cases, the given function may be called multiple times with the same multihash but different offset.
//
// The order of calls to the given function is deterministic, but entirely index-specific.
ForEach(func(multihash.Multihash, uint64) error) error
}
)
// GetFirst is a wrapper over Index.GetAll, returning the offset for the first
// matching indexed CID.
func GetFirst(idx Index, key cid.Cid) (uint64, error) {
var firstOffset uint64
err := idx.GetAll(key, func(offset uint64) bool {
firstOffset = offset
return false
})
return firstOffset, err
}
// New constructs a new index corresponding to the given CAR index codec.
func New(codec multicodec.Code) (Index, error) {
switch codec {
case multicodec.CarIndexSorted:
return newSorted(), nil
case multicodec.CarMultihashIndexSorted:
return NewMultihashSorted(), nil
default:
return nil, fmt.Errorf("unknwon index codec: %v", codec)
}
}
// WriteTo writes the given idx into w.
// The written bytes include the index encoding.
// This can then be read back using index.ReadFrom
func WriteTo(idx Index, w io.Writer) (uint64, error) {
buf := make([]byte, binary.MaxVarintLen64)
b := varint.PutUvarint(buf, uint64(idx.Codec()))
n, err := w.Write(buf[:b])
if err != nil {
return uint64(n), err
}
l, err := idx.Marshal(w)
return uint64(n) + l, err
}
// ReadFrom reads index from r.
// The reader decodes the index by reading the first byte to interpret the encoding.
// Returns error if the encoding is not known.
//
// Attempting to read index data from untrusted sources is not recommended.
// Instead, the index should be regenerated from the CARv2 data payload.
func ReadFrom(r io.Reader) (Index, error) {
codec, err := ReadCodec(r)
if err != nil {
return nil, err
}
idx, err := New(codec)
if err != nil {
return nil, err
}
if err := idx.Unmarshal(r); err != nil {
return nil, err
}
return idx, nil
}
// ReadCodec reads the codec of the index by decoding the first varint read from r.
func ReadCodec(r io.Reader) (multicodec.Code, error) {
code, err := varint.ReadUvarint(internalio.ToByteReader(r))
if err != nil {
return 0, err
}
return multicodec.Code(code), nil
}