From 67b3d9c66ede5a726033f4619734ac1f39d3596d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 27 Feb 2023 12:41:43 +0100 Subject: [PATCH] lib.poptrie: optimize x86 lookup Do not maintain offset counter and repeatedly load key and then shift it by offset. Instead load key at beginning and shift it further for each iteration. This way we only have to do load/bswap once. --- src/lib/poptrie_lookup.dasl | 79 +++++++++++++++---------------------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/src/lib/poptrie_lookup.dasl b/src/lib/poptrie_lookup.dasl index dab200440f..1aea871025 100644 --- a/src/lib/poptrie_lookup.dasl +++ b/src/lib/poptrie_lookup.dasl @@ -60,23 +60,39 @@ local BMI2 = (assert(lib.readfile("/proc/cpuinfo", "*a"), |.define leaves, rdi -- pointer to leaves array |.define nodes, rsi -- pointer to nodes array |.define key, rdx -- key to look up +|.define key_dw, edx -- (key as dword) |.define dmap, rcx -- pointer to directmap |.define index, r8d -- index into node array |.define node, r8 -- pointer into node array -|.define offset, r9d -- offset into key -|.define offsetx, r9 -- (offset as qword) +|.define key_x, r9 -- key extension (for 128 bit keys) |.define v, r10 -- k or s bits extracted from key |.define v_dw, r10d -- (v as dword) |.define vec, r11 -- 64-bit vector or leafvec -- lookup(leaf_t *leaves, node_t *nodes, key) -> leaf_t function lookup (Dst, Poptrie, keysize) + if keysize == 32 then + | mov key_dw, dword [key] + | bswap key + elseif keysize == 64 then + | mov key, [key] + | bswap key + elseif keysize == 128 then + | mov key_x, [key+8] + | bswap key_x + | mov key, [key] + | bswap key + else error("NYI") end if Poptrie.direct_pointing then -- v = extract(key, 0, Poptrie.s) - local direct_shift = 32 - Poptrie.s - | mov v_dw, dword [key] - | bswap v_dw - | shr v_dw, direct_shift + | mov v, key + | shr v, (64 - Poptrie.s) + if keysize <= 64 then + | shl key, Poptrie.s + else + | shld key, key_x, Poptrie.s + | shl key_x, Poptrie.s + end -- index = dmap[v] | mov index, dword [dmap+v*4] -- eax = band(index, leaf_tag - 1) (tag inverted) @@ -89,53 +105,22 @@ function lookup (Dst, Poptrie, keysize) |1: | imul index, 24 -- multiply by node size | lea node, [nodes+index] - -- offset = s - | mov offset, Poptrie.s else - -- index, node, offset = 0, nodes[index], 0 + -- index, node = 0, nodes[index] | xor index, index | lea node, [nodes+0] -- nodes[0] - | xor offset, offset end -- while band(vec, lshift(1ULL, v)) ~= 0 |2: -- v = extract(key, offset, k=6) - if keysize == 32 then - | mov v_dw, dword [key] - | bswap v_dw - if BMI2 then - | shlx v_dw, v_dw, offset - else - | mov ecx, offset - | shl v_dw, cl - end - local shift = 32 - 6 - | shr v_dw, shift - elseif keysize == 64 then - | mov v, [key] - | bswap v - if BMI2 then - | shlx v, v, offsetx - else - | mov ecx, offset - | shl v, cl - end - local shift = 64 - 6 - | shr v, shift - elseif keysize == 128 then - | mov ecx, offset - | mov v, [key] - | bswap v - | mov vec, [key+8] - | bswap vec - | test cl, 64 - | cmovnz v, vec - | shld v, vec, cl - local shift = 64 - 6 - | shr v, shift - else error("NYI") end - -- v = band(v, lshift(1, k=6) - 1) - | and v_dw, 0x3F + | mov v, key + | shr v, (64 - Poptrie.k) + if keysize <= 64 then + | shl key, Poptrie.k + else + | shld key, key_x, Poptrie.k + | shl key_x, Poptrie.k + end -- vec = nodes[index].vector | mov vec, qword [node+8] -- is bit v set in vec? @@ -161,8 +146,6 @@ function lookup (Dst, Poptrie, keysize) -- node = nodes[index] | imul index, 24 -- multiply by node size | lea node, [nodes+index] - -- offset = offset + k - | add offset, 6 | jmp <2 -- loop -- end while |4: