From f4e9e1ad48f536ca9ab70cc77d12595bfa2a78eb Mon Sep 17 00:00:00 2001 From: Asumu Takikawa Date: Tue, 14 Feb 2017 16:30:49 -0800 Subject: [PATCH] Use dynasm to optimize ndpi binding functions Dynasm is used to construct a wrapper function that avoids a struct return (that LuaJIT does not support for traced FFI calls) by using a struct pointer. We use dynasm to avoid having to compile a C wrapper which complicates the build process. The wrapper isn't expressible in Lua. --- src/apps/wall/scanner/ndpi.lua | 21 +++--- src/apps/wall/scanner/ndpi_opt.dasl | 103 ++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 9 deletions(-) create mode 100644 src/apps/wall/scanner/ndpi_opt.dasl diff --git a/src/apps/wall/scanner/ndpi.lua b/src/apps/wall/scanner/ndpi.lua index adf76478ab..1a1943d874 100644 --- a/src/apps/wall/scanner/ndpi.lua +++ b/src/apps/wall/scanner/ndpi.lua @@ -1,5 +1,6 @@ local scanner = require("apps.wall.scanner") local const = require("apps.wall.constants") +local opt = require("apps.wall.scanner.ndpi_opt") local util = require("apps.wall.util") local ndpi = require("ndpi") @@ -103,12 +104,13 @@ function NdpiScanner:scan_packet(p, time) end flow.proto_master, flow.protocol = - self._ndpi:process_packet(flow._ndpi_flow, - p.data + ip_offset, - p.length - ip_offset, - time, - src_id, - dst_id) + opt.process_packet(self._ndpi, + flow._ndpi_flow, + p.data + ip_offset, + p.length - ip_offset, + time, + src_id, + dst_id) if flow.protocol ~= ndpi.protocol.PROTOCOL_UNKNOWN then return true, flow @@ -121,9 +123,10 @@ function NdpiScanner:scan_packet(p, time) (flow.key.ip_proto == IPv4_PROTO_TCP and flow.packets > 10) then flow.proto_master, flow.protocol = - self._ndpi:guess_undetected_protocol(flow.key.ip_proto, - rd32(src_addr), src_port, - rd32(dst_addr), dst_port) + opt.guess_undetected_protocol(self._ndpi, + flow.key.ip_proto, + rd32(src_addr), src_port, + rd32(dst_addr), dst_port) -- TODO: Check whether we should check again for PROTOCOL_UNKNOWN return true, flow end diff --git a/src/apps/wall/scanner/ndpi_opt.dasl b/src/apps/wall/scanner/ndpi_opt.dasl new file mode 100644 index 0000000000..526bf3592e --- /dev/null +++ b/src/apps/wall/scanner/ndpi_opt.dasl @@ -0,0 +1,103 @@ +-- This module creates a wrapper around nDPI's packet processing +-- functions in order to help LuaJIT do its optimizations +-- +-- Specifically, it avoids a struct value return by creating wrappers +-- that pass a struct pointer in instead. LuaJIT can handle that +-- better than a struct return (which is not supported). + +module(..., package.seeall) + +local dasm = require("dasm") +local ffi = require("ffi") +local ndpi = require("ndpi.c").lib + +local debug = false + +|.arch x64 +|.actionlist actions + +-- the definitions here (anchor, assemble, gen) are borrowed from lwaftr +-- (see multi_copy.lua) +__anchor = {} + +local function assemble (name, prototype, generator) + local Dst = dasm.new(actions) + generator(Dst) + local mcode, size = Dst:build() + table.insert(__anchor, mcode) + if debug then + print("mcode dump: "..name) + dasm.dump(mcode, size) + end + return ffi.cast(prototype, mcode) +end + +local function gen_nddpw(orig_f) + local function gen(Dst) + -- pass the first stack argument onto the original function + | mov rax, [rsp+8] + | push rax + + -- call the original function, do stack cleanup + | mov64 rax, orig_f + | call rax + | add rsp, 8 + + -- at this point, rax and rdx have struct + -- fields in them, which we want to write into + -- the struct pointer (2nd stack arg) + | mov rcx, [rsp+16] + | mov [rcx], rax + | mov [rcx+4], rdx + + | ret + end + + return gen +end + +local function gen_nupw(orig_f) + local function gen(Dst) + -- call the original function, aligning on 16 + | sub rsp, 8 + | mov64 rax, orig_f + | call rax + | add rsp, 8 + + -- like above, write into struct + | mov rcx, [rsp+8] + | mov [rcx], rax + | mov [rcx+4], rdx + + | ret + end + + return gen +end + +-- see ljndpi/ndpi/c.lua for the corresponding headers for these functions +-- these have an extra void* argument at the end for the struct pointer +local function make_nddp_wrapper(f) + local wrap = assemble("ndpi_detection_process_packet_wrapper", + ffi.typeof("void (*)(void*, void*, void*, unsigned short, uint64_t, void*, void*, void*)"), + gen_nddpw(f)) + return function(self, flow, data, len, tick, src, dst) + local proto = ffi.new("ndpi_protocol_t") + wrap(self, flow, data, len, tick, src, dst, proto) + return proto.master_protocol, proto.protocol + end +end + +local function make_ngup_wrapper(f) + local wrap = assemble("ndpi_detection_process_packet_wrapper", + ffi.typeof("void (*)(void*, uint8_t, uint32_t, uint16_t, uint32_t, uint16_t, void*)"), + gen_nupw(f)) + return function(self, prot_n, src_h, src_p, dst_h, dst_p) + local proto = ffi.new("ndpi_protocol_t") + wrap(self, prot_n, src_h, src_p, dst_h, dst_p, proto) + return proto.master_protocol, proto.protocol + end +end + +process_packet = make_nddp_wrapper(ndpi.ndpi_detection_process_packet) +guess_undetected_protocol = make_ngup_wrapper(ndpi.ndpi_guess_undetected_protocol)