Skip to content

Commit

Permalink
lib.poptrie: optimize x86 lookup
Browse files Browse the repository at this point in the history
Do not maintain offset counter and repeatedly load key
and then shift it by offset.

Instead load key at beginning and shift it further
for each iteration.

This way we only have to do load/bswap once.
  • Loading branch information
eugeneia committed Feb 27, 2023
1 parent 2fbc94b commit 67b3d9c
Showing 1 changed file with 31 additions and 48 deletions.
79 changes: 31 additions & 48 deletions src/lib/poptrie_lookup.dasl
Expand Up @@ -60,23 +60,39 @@ local BMI2 = (assert(lib.readfile("/proc/cpuinfo", "*a"),
|.define leaves, rdi -- pointer to leaves array
|.define nodes, rsi -- pointer to nodes array
|.define key, rdx -- key to look up
|.define key_dw, edx -- (key as dword)
|.define dmap, rcx -- pointer to directmap
|.define index, r8d -- index into node array
|.define node, r8 -- pointer into node array
|.define offset, r9d -- offset into key
|.define offsetx, r9 -- (offset as qword)
|.define key_x, r9 -- key extension (for 128 bit keys)
|.define v, r10 -- k or s bits extracted from key
|.define v_dw, r10d -- (v as dword)
|.define vec, r11 -- 64-bit vector or leafvec

-- lookup(leaf_t *leaves, node_t *nodes, key) -> leaf_t
function lookup (Dst, Poptrie, keysize)
if keysize == 32 then
| mov key_dw, dword [key]
| bswap key
elseif keysize == 64 then
| mov key, [key]
| bswap key
elseif keysize == 128 then
| mov key_x, [key+8]
| bswap key_x
| mov key, [key]
| bswap key
else error("NYI") end
if Poptrie.direct_pointing then
-- v = extract(key, 0, Poptrie.s)
local direct_shift = 32 - Poptrie.s
| mov v_dw, dword [key]
| bswap v_dw
| shr v_dw, direct_shift
| mov v, key
| shr v, (64 - Poptrie.s)
if keysize <= 64 then
| shl key, Poptrie.s
else
| shld key, key_x, Poptrie.s
| shl key_x, Poptrie.s
end
-- index = dmap[v]
| mov index, dword [dmap+v*4]
-- eax = band(index, leaf_tag - 1) (tag inverted)
Expand All @@ -89,53 +105,22 @@ function lookup (Dst, Poptrie, keysize)
|1:
| imul index, 24 -- multiply by node size
| lea node, [nodes+index]
-- offset = s
| mov offset, Poptrie.s
else
-- index, node, offset = 0, nodes[index], 0
-- index, node = 0, nodes[index]
| xor index, index
| lea node, [nodes+0] -- nodes[0]
| xor offset, offset
end
-- while band(vec, lshift(1ULL, v)) ~= 0
|2:
-- v = extract(key, offset, k=6)
if keysize == 32 then
| mov v_dw, dword [key]
| bswap v_dw
if BMI2 then
| shlx v_dw, v_dw, offset
else
| mov ecx, offset
| shl v_dw, cl
end
local shift = 32 - 6
| shr v_dw, shift
elseif keysize == 64 then
| mov v, [key]
| bswap v
if BMI2 then
| shlx v, v, offsetx
else
| mov ecx, offset
| shl v, cl
end
local shift = 64 - 6
| shr v, shift
elseif keysize == 128 then
| mov ecx, offset
| mov v, [key]
| bswap v
| mov vec, [key+8]
| bswap vec
| test cl, 64
| cmovnz v, vec
| shld v, vec, cl
local shift = 64 - 6
| shr v, shift
else error("NYI") end
-- v = band(v, lshift(1, k=6) - 1)
| and v_dw, 0x3F
| mov v, key
| shr v, (64 - Poptrie.k)
if keysize <= 64 then
| shl key, Poptrie.k
else
| shld key, key_x, Poptrie.k
| shl key_x, Poptrie.k
end
-- vec = nodes[index].vector
| mov vec, qword [node+8]
-- is bit v set in vec?
Expand All @@ -161,8 +146,6 @@ function lookup (Dst, Poptrie, keysize)
-- node = nodes[index]
| imul index, 24 -- multiply by node size
| lea node, [nodes+index]
-- offset = offset + k
| add offset, 6
| jmp <2 -- loop
-- end while
|4:
Expand Down

0 comments on commit 67b3d9c

Please sign in to comment.