-
Notifications
You must be signed in to change notification settings - Fork 298
/
poptrie_lookup.dasl
178 lines (164 loc) · 4.98 KB
/
poptrie_lookup.dasl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
-- Use of this source code is governed by the Apache 2.0 license; see COPYING.
module(..., package.seeall)
local debug = false
local lib = require("core.lib")
local ffi = require("ffi")
local dasm = require("dasm")
|.arch x64
|.actionlist actions
|.globalnames globalnames
-- Table keeping machine code alive to the GC.
local anchor = {}
-- leaf_t poptrie_lookup
-- (leaf_t *leaves, node_t *nodes, uint8_t *key, base_t *directmap)
-- NB: this type is hardcoded here to avoid filling up the ctype table
local prototype = ffi.typeof(
"uint32_t (*) (void *, void *, uint8_t *, void *)"
)
-- Assemble a lookup routine
function generate (Poptrie, keysize)
-- Assert assumptions about lib.poptrie
assert(Poptrie.k == 6)
if Poptrie.direct_pointing then
assert(Poptrie.s <= 32)
assert(Poptrie.leaf_tag == bit.lshift(1, 31))
end
assert(ffi.sizeof(Poptrie.vector_t) == 8)
assert(ffi.sizeof(Poptrie.base_t) == 4)
assert(ffi.offsetof(Poptrie.node_t, 'leafvec') == 0)
assert(ffi.offsetof(Poptrie.node_t, 'vector') == 8)
assert(ffi.offsetof(Poptrie.node_t, 'base0') == 16)
assert(ffi.offsetof(Poptrie.node_t, 'base1') == 20)
local name = "poptrie_lookup(k="..Poptrie.k..", keysize="..keysize..")"
local Dst = dasm.new(actions)
lookup(Dst, Poptrie, keysize)
local mcode, size = Dst:build()
table.insert(anchor, mcode)
if debug then
print("mcode dump: "..name)
dasm.dump(mcode, size)
end
return ffi.cast(prototype, mcode)
end
-- Do we have BMI2?
local BMI2 = (assert(lib.readfile("/proc/cpuinfo", "*a"),
"failed to read /proc/cpuinfo for hardware check")
:match("bmi2"))
|.define leaves, rdi -- pointer to leaves array
|.define nodes, rsi -- pointer to nodes array
|.define key, rdx -- key to look up
|.define key_dw, edx -- (key as dword)
|.define dmap, rcx -- pointer to directmap
|.define index, r8d -- index into node array
|.define node, r8 -- pointer into node array
|.define key_x, r9 -- key extension (for 128 bit keys)
|.define v, r10 -- k or s bits extracted from key
|.define v_dw, r10d -- (v as dword)
|.define vec, r11 -- 64-bit vector or leafvec
-- lookup(leaf_t *leaves, node_t *nodes, key) -> leaf_t
function lookup (Dst, Poptrie, keysize)
if keysize == 32 then
| mov key_dw, dword [key]
| bswap key
elseif keysize == 64 then
| mov key, [key]
| bswap key
elseif keysize == 128 then
| mov key_x, [key+8]
| bswap key_x
| mov key, [key]
| bswap key
else error("NYI") end
if Poptrie.direct_pointing then
-- v = extract(key, 0, Poptrie.s)
| mov v, key
| shr v, (64 - Poptrie.s)
if keysize <= 64 then
| shl key, Poptrie.s
else
| shld key, key_x, Poptrie.s
| shl key_x, Poptrie.s
end
-- index = dmap[v]
| mov index, dword [dmap+v*4]
-- eax = band(index, leaf_tag - 1) (tag inverted)
| mov eax, index
-- is leaf_tag set? (unsets bit)
| btr eax, 31
| jnc >1 -- leaf_tag not set, index is a node
| ret
-- node, offset = nodes[index], s
|1:
| imul index, 24 -- multiply by node size
| lea node, [nodes+index]
else
-- index, node = 0, nodes[index]
| xor index, index
| lea node, [nodes+0] -- nodes[0]
end
-- while band(vec, lshift(1ULL, v)) ~= 0
|2:
-- v = extract(key, offset, k=6)
| mov v, key
| shr v, (64 - Poptrie.k)
if keysize <= 64 then
| shl key, Poptrie.k
else
| shld key, key_x, Poptrie.k
| shl key_x, Poptrie.k
end
-- vec = nodes[index].vector
| mov vec, qword [node+8]
-- is bit v set in vec?
| bt vec, v
| jnc >4 -- reached leaf, exit loop
-- rax = band(vec, lshift(2ULL, v) - 1)
if BMI2 then
| lea rcx, [v+1]
| bzhi rax, vec, rcx
else
| mov eax, 2
| mov ecx, v_dw
| shl rax, cl
| sub rax, 1
| and rax, vec
end
-- rax = popcnt(rax)
| popcnt rax, rax
-- index = base + bc - 1
| mov index, dword [node+20] -- nodes[index].base1
| sub index, 1
| add index, eax
-- node = nodes[index]
| imul index, 24 -- multiply by node size
| lea node, [nodes+index]
| jmp <2 -- loop
-- end while
|4:
if Poptrie.leaf_compression then
-- vec = nodes[index].leafvec
| mov vec, qword [node+0]
else error("NYI") end
-- rax = band(vec, lshift(2ULL, v) - 1)
if BMI2 then
| lea rcx, [v+1]
| bzhi rax, vec, rcx
else
| mov eax, 2
| mov ecx, v_dw
| shl rax, cl
| sub rax, 1
| and rax, vec
end
-- rax = popcnt(rax)
| popcnt rax, rax
-- return leaves[base + bc - 1]
| mov index, dword [node+16] -- nodes[index].base0
| add index, eax -- index = base + bc
if ffi.sizeof(Poptrie.leaf_t) == 2 then
| movzx eax, word [leaves+index*2-2] -- leaves[index - 1]
elseif ffi.sizeof(Poptrie.leaf_t) == 4 then
| mov eax, dword [leaves+index*4-4] -- leaves[index - 1]
else error("NYI") end
| ret
end