From 045f48ec3df4d604b70f728cf38e930dec098eb8 Mon Sep 17 00:00:00 2001 From: Alexander Gall Date: Thu, 19 Oct 2017 16:52:56 +0200 Subject: [PATCH] Add templates for HTTP The templates v{4,6}_HTTP exract the host name, request method and URL from HTTP GET requests using the standard IPFIX elements httpRequestHost, httpRequestMethod and httpRequestTarget. Currently, only the GET method is supported. --- src/apps/ipfix/ipfix.lua | 4 +- src/apps/ipfix/strings.lua | 86 +++++++++++++++++++++++++ src/apps/ipfix/template.lua | 124 ++++++++++++++++++++++++++++++++++++ src/core/clib.h | 3 + 4 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 src/apps/ipfix/strings.lua diff --git a/src/apps/ipfix/ipfix.lua b/src/apps/ipfix/ipfix.lua index 3d11245ca2..232ed7efd0 100644 --- a/src/apps/ipfix/ipfix.lua +++ b/src/apps/ipfix/ipfix.lua @@ -228,13 +228,13 @@ function FlowSet:record_flows(timestamp) local pkt = link.receive(self.incoming) counter.add(self.shm.packets_in) self.template:extract(pkt, timestamp, entry) - packet.free(pkt) local lookup_result = self.table:lookup_ptr(entry.key) if lookup_result == nil then self.table:add(entry.key, entry.value) else - self.template:accumulate(lookup_result, entry) + self.template:accumulate(lookup_result, entry, pkt) end + packet.free(pkt) end end diff --git a/src/apps/ipfix/strings.lua b/src/apps/ipfix/strings.lua new file mode 100644 index 0000000000..90d2947a1e --- /dev/null +++ b/src/apps/ipfix/strings.lua @@ -0,0 +1,86 @@ +module(..., package.seeall) + +local ffi = require("ffi") + +ct_t = ffi.typeof([[ + struct { + uint8_t *text; + uint16_t length; + uint16_t pos; + } +]]) + +function ct_set(ct, pos) + ct.pos = pos +end + +function ct_get(ct) + return ct.pos +end + +function ct_at(ct) + return ct.text + ct.pos +end + +function ct_init(ct, text, length, pos) + ct.text = text + ct.length = length + ct.pos = pos or 0 +end + +function search(string, ct, tail) + local slen = string.len + local pos = ct.pos + while (pos + slen < ct.length) do + if ffi.C.strncasecmp(string.buf, ct.text + pos, slen) == 0 then + if tail then pos = pos + slen end + ct.pos = pos + return pos + end + pos = pos + 1 + end + return nil +end + +function upto_space_or_cr(ct) + local text = ct.text + local pos = ct.pos + local pos_start = pos + while (pos < ct.length and text[pos] ~= 32 and text[pos] ~= 13) do + pos = pos + 1 + end + ct.pos = pos + return pos, pos - pos_start +end + +function skip_space(ct) + local text = ct.text + local pos = ct.pos + local pos_start = pos + while (pos < ct.length and text[pos] == 32) do + pos = pos + 1 + end + ct.pos = pos + return pos, pos - pos_start +end + +function string_to_buf(s) + -- Using ffi.new("uint8_t[?]", #s) results in trace aborts due to + -- "bad argument type" in ffi.sizeof() + local buf = ffi.new("uint8_t["..#s.."]") + for i = 1, #s do + buf[i-1] = s:byte(i,i) + end + return buf +end + +function strings_to_buf(t) + local result = {} + for k, v in pairs(t) do + result[k] = { + buf = string_to_buf(v), + len = #v + } + end + return result +end diff --git a/src/apps/ipfix/template.lua b/src/apps/ipfix/template.lua index ceecd1d4df..1c5aa37b4b 100644 --- a/src/apps/ipfix/template.lua +++ b/src/apps/ipfix/template.lua @@ -9,9 +9,11 @@ local pf = require("pf") local consts = require("apps.lwaftr.constants") local lib = require("core.lib") local ctable = require("lib.ctable") +local counter = require("core.counter") local ethernet = require("lib.protocol.ethernet") local ipv4 = require("lib.protocol.ipv4") local metadata = require("apps.ipfix.packet_metadata") +local strings = require("apps.ipfix.strings") local ntohs = lib.ntohs local htonl, htons = lib.htonl, lib.htons @@ -342,6 +344,78 @@ local function v6_extract (self, pkt, timestamp, entry) end end +--- Helper functions for HTTP templates + +-- We want to be able to find a "Host:" header even if it is not in +-- the same TCP segment as the GET request, which requires to keep +-- state. +local HTTP_state_t = ffi.typeof([[ + struct { + uint8_t have_GET; + uint8_t have_host; + uint8_t examined; + } __attribute__((packed)) +]]) +-- The number of TCP segments to scan for the first GET request +-- (including the SYN segment, which is skipped). Most requests are +-- found in the first non-handshake packet (segment #3 from the +-- client). Empirical evidence shows a strong peak there with a long +-- tail. A cutoff of 10 is expected to find at least 80% of the GET +-- requests. +local HTTP_scan_threshold = 10 +-- HTTP-specific statistics counters +local function HTTP_counters() + return { + HTTP_flows_examined = 0, + HTTP_GET_matches = 0, + HTTP_host_matches = 0 + } +end + +local HTTP_strings = strings.strings_to_buf({ + GET = 'GET ', + Host = 'Host:' +}) + +local HTTP_ct = strings.ct_t() + +local function HTTP_accumulate(self, dst, new, pkt) + local md = metadata_get(pkt) + if ((dst.value.packetDeltaCount >= HTTP_scan_threshold or + -- TCP SYN + bit.band(new.value.tcpControlBitsReduced, 0x02) == 0x02)) then + return + end + local state = dst.value.state + if state.examined == 0 then + self.counters.HTTP_flows_examined = + self.counters.HTTP_flows_examined + 1 + state.examined = 1 + end + strings.ct_init(HTTP_ct, pkt.data, pkt.length, md.l4 - pkt.data) + if (state.have_GET == 0 and + strings.search(HTTP_strings.GET, HTTP_ct, true)) then + ffi.copy(dst.value.httpRequestMethod, 'GET') + state.have_GET = 1 + strings.skip_space(HTTP_ct) + local start = strings.ct_at(HTTP_ct) + local _, length = strings.upto_space_or_cr(HTTP_ct) + length = math.min(length, ffi.sizeof(dst.value.httpRequestTarget) - 1) + ffi.copy(dst.value.httpRequestTarget, start, length) + self.counters.HTTP_GET_matches = self.counters.HTTP_GET_matches + 1 + end + if (state.have_GET == 1 and state.have_host == 0 and + strings.search(HTTP_strings.Host, HTTP_ct, true)) then + state.have_host = 1 + strings.skip_space(HTTP_ct) + local start = strings.ct_at(HTTP_ct) + local _, length = strings.upto_space_or_cr(HTTP_ct) + length = math.min(length, ffi.sizeof(dst.value.httpRequestHost) - 1) + ffi.copy(dst.value.httpRequestHost, start, length) + self.counters.HTTP_host_matches = self.counters.HTTP_host_matches + 1 + end +end + templates = { v4 = { id = 256, @@ -375,6 +449,31 @@ templates = { protos[key.protocolIdentifier] or tostring(key.protocolIdentifier)) end }, + v4_HTTP = { + id = 257, + filter = "ip and tcp dst port 80", + keys = { "sourceIPv4Address", + "destinationIPv4Address", + "protocolIdentifier", + "sourceTransportPort", + "destinationTransportPort" }, + values = { "flowStartMilliseconds", + "flowEndMilliseconds", + "packetDeltaCount", + "octetDeltaCount", + "tcpControlBitsReduced", + "httpRequestMethod=8", + "httpRequestHost=32", + "httpRequestTarget=64" }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v4_extract, + accumulate = function (self, dst, new, pkt) + accumulate_generic(dst, new) + accumulate_tcp_flags_reduced(dst, new) + HTTP_accumulate(self, dst, new, pkt) + end + }, v6 = { id = 512, filter = "ip6", @@ -407,6 +506,31 @@ templates = { protos[key.protocolIdentifier] or tostring(key.protocolIdentifier)) end }, + v6_HTTP = { + id = 513, + filter = "ip6 and tcp dst port 80", + keys = { "sourceIPv6Address", + "destinationIPv6Address", + "protocolIdentifier", + "sourceTransportPort", + "destinationTransportPort" }, + values = { "flowStartMilliseconds", + "flowEndMilliseconds", + "packetDeltaCount", + "octetDeltaCount", + "tcpControlBitsReduced", + "httpRequestMethod=8", + "httpRequestHost=32", + "httpRequestTarget=64" }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v6_extract, + accumulate = function (self, dst, new, pkt) + accumulate_generic(dst, new) + accumulate_tcp_flags_reduced(dst, new) + HTTP_accumulate(self, dst, new, pkt) + end + } } function selftest() diff --git a/src/core/clib.h b/src/core/clib.h index 33906c3b49..de2496fbb3 100644 --- a/src/core/clib.h +++ b/src/core/clib.h @@ -27,6 +27,9 @@ void *memmove(void *dest, const void *src, int n); // strncpy(3) - copy a string char *strncpy(char *dest, const char *src, size_t n); +// strncasecmp(3) - compare two strings ignoring case +int strncasecmp(const char *s1, const char *s2, size_t n); + // read(2) - read from a file descriptor int read(int fd, void *buf, size_t count);