/
bitstore.py
263 lines (231 loc) · 9.79 KB
/
bitstore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python
"""
This modules is used by the Bits and BitString classes and does not form
part of the public interface. Please do not use this module directly as
it is largely undocumented and could change without warning.
"""
import copy
class ConstByteStore(object):
"""Stores raw bytes together with a bit offset and length."""
__slots__ = ('offset', '_rawarray', 'bitlength')
def __init__(self, data, bitlength=None, offset=None):
"""data is either a bytearray or a MmapByteArray"""
self._rawarray = data
if offset is None:
offset = 0
if bitlength is None:
bitlength = 8 * len(data) - offset
self.offset = offset
self.bitlength = bitlength
def getbit(self, pos):
assert 0 <= pos < self.bitlength
byte, bit = divmod(self.offset + pos, 8)
return bool(self._rawarray[byte] & (128 >> bit))
def getbyte(self, pos):
"""Direct access to byte data."""
return self._rawarray[pos]
def getbyteslice(self, start, end):
"""Direct access to byte data."""
c = self._rawarray[start:end]
return c
@property
def bytelength(self):
if not self.bitlength:
return 0
sb = self.offset // 8
eb = (self.offset + self.bitlength - 1) // 8
return eb - sb + 1
def __copy__(self):
return ByteStore(self._rawarray[:], self.bitlength, self.offset)
def _appendstore(self, store):
"""Join another store on to the end of this one."""
if not store.bitlength:
return
# Set new array offset to the number of bits in the final byte of current array.
store = offsetcopy(store, (self.offset + self.bitlength) % 8)
if store.offset:
# first do the byte with the join.
joinval = (self._rawarray.pop() & (255 ^ (255 >> store.offset)) |
(store.getbyte(0) & (255 >> store.offset)))
self._rawarray.append(joinval)
self._rawarray.extend(store._rawarray[1:])
else:
self._rawarray.extend(store._rawarray)
self.bitlength += store.bitlength
@property
def byteoffset(self):
return self.offset // 8
@property
def rawbytes(self):
return self._rawarray
class ByteStore(ConstByteStore):
__slots__ = ()
def setbit(self, pos):
assert 0 <= pos < self.bitlength
byte, bit = divmod(self.offset + pos, 8)
self._rawarray[byte] |= (128 >> bit)
def unsetbit(self, pos):
assert 0 <= pos < self.bitlength
byte, bit = divmod(self.offset + pos, 8)
self._rawarray[byte] &= ~(128 >> bit)
def invertbit(self, pos):
assert 0 <= pos < self.bitlength
byte, bit = divmod(self.offset + pos, 8)
self._rawarray[byte] ^= (128 >> bit)
def setbyte(self, pos, value):
self._rawarray[pos + self.byteoffset] = value
def setbyteslice(self, start, end, value):
self._rawarray[start + self.byteoffset:end + self.byteoffset] = value
def appendstore(self, store):
"""Join another store on to the end of this one."""
self._appendstore(store)
def prependstore(self, store):
"""Join another store on to the start of this one."""
if not store.bitlength:
return
# Set the offset of copy of store so that it's final byte
# ends in a position that matches the offset of self,
# then join self on to the end of it.
store = offsetcopy(store, (self.offset - store.bitlength) % 8)
assert (store.offset + store.bitlength) % 8 == self.offset
if self.offset:
# first do the byte with the join.
store.setbyte(-1, (store.getbyte(-1) & (255 ^ (255 >> self.offset)) |\
(self._rawarray[0] & (255 >> self.offset))))
store._rawarray.extend(self._rawarray[1: self.bytelength])
else:
store._rawarray.extend(self._rawarray[0: self.bytelength])
self._rawarray = store._rawarray
self.offset = store.offset
self.bitlength += store.bitlength
def offsetcopy(s, newoffset):
"""Return a copy of s with the newoffset."""
assert 0 <= newoffset < 8
if not s.bitlength:
return copy.copy(s)
else:
if newoffset == s.offset % 8:
return ByteStore(s.getbyteslice(0, s.bytelength), s.bitlength, newoffset)
newdata = []
d = s._rawarray
assert newoffset != s.offset % 8
if newoffset < s.offset % 8:
# We need to shift everything left
shiftleft = s.offset % 8 - newoffset
# First deal with everything except for the final byte
for x in range(s.byteoffset, s.byteoffset + s.bytelength - 1):
newdata.append(((d[x] << shiftleft) & 0xff) +\
(d[x + 1] >> (8 - shiftleft)))
bits_in_last_byte = (s.offset + s.bitlength) % 8
if not bits_in_last_byte:
bits_in_last_byte = 8
if bits_in_last_byte > shiftleft:
newdata.append((d[s.byteoffset + s.bytelength - 1] << shiftleft) & 0xff)
else: # newoffset > s._offset % 8
shiftright = newoffset - s.offset % 8
newdata.append(s.getbyte(0) >> shiftright)
for x in range(1, s.bytelength):
newdata.append(((d[x - 1] << (8 - shiftright)) & 0xff) +\
(d[x] >> shiftright))
bits_in_last_byte = (s.offset + s.bitlength) % 8
if not bits_in_last_byte:
bits_in_last_byte = 8
if bits_in_last_byte + shiftright > 8:
newdata.append((d[s.byteoffset + s.bytelength - 1] << (8 - shiftright)) & 0xff)
new_s = ByteStore(bytearray(newdata), s.bitlength, newoffset)
assert new_s.offset == newoffset
return new_s
def equal(a, b):
"""Return True if a == b."""
# We want to return False for inequality as soon as possible, which
# means we get lots of special cases.
# First the easy one - compare lengths:
a_bitlength = a.bitlength
b_bitlength = b.bitlength
if a_bitlength != b_bitlength:
return False
if not a_bitlength:
assert b_bitlength == 0
return True
# Make 'a' the one with the smaller offset
if (a.offset % 8) > (b.offset % 8):
a, b = b, a
# and create some aliases
a_bitoff = a.offset % 8
b_bitoff = b.offset % 8
a_byteoffset = a.byteoffset
b_byteoffset = b.byteoffset
a_bytelength = a.bytelength
b_bytelength = b.bytelength
da = a._rawarray
db = b._rawarray
# If they are pointing to the same data, they must be equal
if da is db and a.offset == b.offset:
return True
if a_bitoff == b_bitoff:
bits_spare_in_last_byte = 8 - (a_bitoff + a_bitlength) % 8
if bits_spare_in_last_byte == 8:
bits_spare_in_last_byte = 0
# Special case for a, b contained in a single byte
if a_bytelength == 1:
a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
return a_val == b_val
# Otherwise check first byte
if da[a_byteoffset] & (0xff >> a_bitoff) != db[b_byteoffset] & (0xff >> b_bitoff):
return False
# then everything up to the last
b_a_offset = b_byteoffset - a_byteoffset
for x in range(1 + a_byteoffset, a_byteoffset + a_bytelength - 1):
if da[x] != db[b_a_offset + x]:
return False
# and finally the last byte
return (da[a_byteoffset + a_bytelength - 1] >> bits_spare_in_last_byte ==
db[b_byteoffset + b_bytelength - 1] >> bits_spare_in_last_byte)
assert a_bitoff != b_bitoff
# This is how much we need to shift a to the right to compare with b:
shift = b_bitoff - a_bitoff
# Special case for b only one byte long
if b_bytelength == 1:
assert a_bytelength == 1
a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
return a_val == b_val
# Special case for a only one byte long
if a_bytelength == 1:
assert b_bytelength == 2
a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
b_val = ((db[b_byteoffset] << 8) + db[b_byteoffset + 1]) << b_bitoff
b_val &= 0xffff
b_val >>= 16 - b_bitlength
return a_val == b_val
# Compare first byte of b with bits from first byte of a
if (da[a_byteoffset] & (0xff >> a_bitoff)) >> shift != db[b_byteoffset] & (0xff >> b_bitoff):
return False
# Now compare every full byte of b with bits from 2 bytes of a
for x in range(1, b_bytelength - 1):
# Construct byte from 2 bytes in a to compare to byte in b
b_val = db[b_byteoffset + x]
a_val = ((da[a_byteoffset + x - 1] << 8) + da[a_byteoffset + x]) >> shift
a_val &= 0xff
if a_val != b_val:
return False
# Now check bits in final byte of b
final_b_bits = (b.offset + b_bitlength) % 8
if not final_b_bits:
final_b_bits = 8
b_val = db[b_byteoffset + b_bytelength - 1] >> (8 - final_b_bits)
final_a_bits = (a.offset + a_bitlength) % 8
if not final_a_bits:
final_a_bits = 8
if b.bytelength > a_bytelength:
assert b_bytelength == a_bytelength + 1
a_val = da[a_byteoffset + a_bytelength - 1] >> (8 - final_a_bits)
a_val &= 0xff >> (8 - final_b_bits)
return a_val == b_val
assert a_bytelength == b_bytelength
a_val = da[a_byteoffset + a_bytelength - 2] << 8
a_val += da[a_byteoffset + a_bytelength - 1]
a_val >>= (8 - final_a_bits)
a_val &= 0xff >> (8 - final_b_bits)
return a_val == b_val