Skip to content

Commit

Permalink
Merge PR #1059 (lib.cltable: hash table w/ FFI keys and Lua vals) int…
Browse files Browse the repository at this point in the history
…o max-next
  • Loading branch information
eugeneia committed Nov 8, 2016
2 parents aafedf0 + d64e70d commit 944028a
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 3 deletions.
65 changes: 65 additions & 0 deletions src/lib/README.cltable.md
@@ -0,0 +1,65 @@
### cltable (lib.cltable)

Ever been annoyed that you can't create a hash table where the keys are
FFI values, like raw IPv4 addresses, but the values are Lua objects?
Well of course you can key a normal Lua table by any Lua value, but the
key is looked up by identity and not by value, which is rarely what you
want. `foo[lib.protocol.ipv4:pton('1.2.3.4')]` will not be the same as
`foo[lib.protocol.ipv4:pton('1.2.3.4')]`, as the `pton` call produces a
fresh value every time. What you usually want with FFI-keyed tables is
to be able to look up the entry by value, not by identity.

Well never fear, *cltable* is here. A cltable is a data type that
associates FFI keys with any old Lua value. When you look up a key in a
cltable, the key is matched by-value.

Externally, a cltable provides the same interface as a Lua table, with
the exception that to iterate over the table's values, you need to use
`cltable.pairs` function instead of `pairs`.

Internally, cltable uses a [`ctable`](./README.ctable.md) to map the key
to an index, then if an entry is found, looks up that index in a side
table of Lua objects. See the ctable documentation for more performance
characteristics.

To create a cltable, use pass an appropriate parameter table to
`cltable.new`, like this:

```lua
local cltable = require('lib.cltable')
local ffi = require('ffi')
local params = { key_type = ffi.typeof('uint8_t[4]') }
local cltab = cltable.new(params)
```

— Function **cltable.new** *parameters*

Create a new cltable. *parameters* is a table of key/value pairs. The
following key is required:

* `key_type`: An FFI type (LuaJIT "ctype") for keys in this table.

Optional entries that may be present in the *parameters* table are
`hash_fn`, `initial_size`, `max_occupancy_rate`, and
`min_occupancy_rate`. See the ctable documentation for their meanings.

— Function **cltable.build** *keys* *values*

Given the ctable *keys* that maps keys to indexes, and a corresponding
Lua table *values* containing the index->value associations, return a
cltable.

— Property **.keys**

A cltable's `keys` property holds the table's keys, as a ctable. If you
modify it, you get to keep both pieces.

— Property **.values**

Likewise, a cltable's `values` property holds the table's values, as a
Lua array (table). If you break it, you buy it!

— Function **cltable.pairs** *cltable*

Return an iterator over the keys and values in *cltable*. Use this when
you would use `pairs` on a regular Lua table.
10 changes: 9 additions & 1 deletion src/lib/README.ctable.md
Expand Up @@ -62,7 +62,6 @@ following keys are required:

* `key_type`: An FFI type (LuaJIT "ctype") for keys in this table.
* `value_type`: An FFI type (LuaJT "ctype") for values in this table.
* `hash_fn`: A function that takes a key and returns a hash value.

Hash values are unsigned 32-bit integers in the range `[0,
0xFFFFFFFF)`. That is to say, `0xFFFFFFFF` is the only unsigned 32-bit
Expand All @@ -71,6 +70,9 @@ hash value in the correct range.

Optional entries that may be present in the *parameters* table include:

* `hash_fn`: A function that takes a key and returns a hash value.
If not given, defaults to the result of calling `compute_hash_fn`
on the key type.
* `initial_size`: The initial size of the hash table, including free
space. Defaults to 8 slots.
* `max_occupancy_rate`: The maximum ratio of `occupancy/size`, where
Expand Down Expand Up @@ -198,3 +200,9 @@ Hash the first 48 bits of a byte sequence.
— Function **ctable.hashv_64** *ptr*

Hash the first 64 bits of a byte sequence.

— Function **ctable.compute_hash_fn** *ctype*

Return a `hashv_`-like hash function over the bytes in instances of
*ctype*. Note that the same reservations apply as for `hash_32`
above.
72 changes: 72 additions & 0 deletions src/lib/cltable.lua
@@ -0,0 +1,72 @@
module(..., package.seeall)

local ffi = require("ffi")
local ctable = require("lib.ctable")

function build(keys, values)
return setmetatable({ keys = keys, values = values },
{__index=get, __newindex=set})
end

function new(params)
local ctable_params = {}
for k,v in _G.pairs(params) do ctable_params[k] = v end
assert(not ctable_params.value_type)
ctable_params.value_type = ffi.typeof('uint32_t')
return build(ctable.new(ctable_params), {})
end

function get(cltable, key)
local entry = cltable.keys:lookup_ptr(key)
if not entry then return nil end
return cltable.values[entry.value]
end

function set(cltable, key, value)
local entry = cltable.keys:lookup_ptr(key)
if entry then
cltable.values[entry.value] = value
if value == nil then cltable.keys:remove_ptr(entry) end
elseif value ~= nil then
table.insert(cltable.values, value)
cltable.keys:add(key, #cltable.values)
end
end

function pairs(cltable)
local ctable_next, ctable_max, ctable_entry = cltable.keys:iterate()
return function()
ctable_entry = ctable_next(ctable_max, ctable_entry)
if not ctable_entry then return end
return ctable_entry.key, cltable.values[ctable_entry.value]
end
end

function selftest()
print("selftest: cltable")

local ipv4 = require('lib.protocol.ipv4')
local params = { key_type = ffi.typeof('uint8_t[4]') }
local cltab = new(params)

for i=0,255 do
local addr = ipv4:pton('1.2.3.'..i)
cltab[addr] = 'hello, '..i
end

for i=0,255 do
local addr = ipv4:pton('1.2.3.'..i)
assert(cltab[addr] == 'hello, '..i)
end

for i=0,255 do
-- Remove value that is present.
cltab[ipv4:pton('1.2.3.'..i)] = nil
-- Remove value that is not present.
cltab[ipv4:pton('2.3.4.'..i)] = nil
end

for k,v in pairs(cltab) do error('not reachable') end

print("selftest: ok")
end
25 changes: 23 additions & 2 deletions src/lib/ctable.lua
Expand Up @@ -91,8 +91,9 @@ end
-- FIXME: For now the value_type option is required, but in the future
-- we should allow for a nil value type to create a set instead of a
-- map.
local required_params = set('key_type', 'value_type', 'hash_fn')
local required_params = set('key_type', 'value_type')
local optional_params = {
hash_fn = false,
initial_size = 8,
max_occupancy_rate = 0.9,
min_occupancy_rate = 0.0
Expand All @@ -103,7 +104,7 @@ function new(params)
local params = parse_params(params, required_params, optional_params)
ctab.entry_type = make_entry_type(params.key_type, params.value_type)
ctab.type = make_entries_type(ctab.entry_type)
ctab.hash_fn = params.hash_fn
ctab.hash_fn = params.hash_fn or compute_hash_fn(params.key_type)
ctab.equal_fn = make_equal_fn(params.key_type)
ctab.size = 0
ctab.occupancy = 0
Expand Down Expand Up @@ -391,6 +392,7 @@ function hash_32(i32)
return uint32_cast[0]
end

local cast = ffi.cast
function hashv_32(key)
return hash_32(cast(uint32_ptr_t, key)[0])
end
Expand All @@ -410,6 +412,22 @@ function hashv_64(key)
return hash_32(bxor(hi, hash_32(lo)))
end

local hash_fns_by_size = { [4]=hashv_32, [8]=hashv_64 }
function compute_hash_fn(ctype)
local size = ffi.sizeof(ctype)
if not hash_fns_by_size[size] then
hash_fns_by_size[size] = function(key)
local h = 0
local words = cast(uint32_ptr_t, key)
local bytes = cast('uint8_t*', key)
for i=0,size/4 do h = hash_32(bxor(h, words[i])) end
for i=1,size%4 do h = hash_32(bxor(h, bytes[size-i])) end
return h
end
end
return hash_fns_by_size[size]
end

function selftest()
print("selftest: ctable")

Expand Down Expand Up @@ -457,8 +475,11 @@ function selftest()

local function check_bytes_equal(type, a, b)
local equal_fn = make_equal_fn(type)
local hash_fn = compute_hash_fn(type)
assert(equal_fn(ffi.new(type, a), ffi.new(type, a)))
assert(not equal_fn(ffi.new(type, a), ffi.new(type, b)))
assert(hash_fn(ffi.new(type, a)) == hash_fn(ffi.new(type, a)))
assert(hash_fn(ffi.new(type, a)) ~= hash_fn(ffi.new(type, b)))
end
check_bytes_equal(ffi.typeof('uint16_t[1]'), {1}, {2}) -- 2 byte
check_bytes_equal(ffi.typeof('uint32_t[1]'), {1}, {2}) -- 4 byte
Expand Down

0 comments on commit 944028a

Please sign in to comment.