Permalink
Browse files

Merge PR #1250 (v2017.11 release) into master

  • Loading branch information...
eugeneia committed Dec 12, 2017
2 parents 2121080 + 6c0f065 commit 248aae7392a088dfdbacdc6139d51b005e6e6ec6
Showing with 1,910 additions and 479 deletions.
  1. +1 −1 .version
  2. +32 −2 lib/ljsyscall/syscall/linux/syscalls.lua
  3. +6 −0 lib/luajit/src/lib_jit.c
  4. +2 −2 lib/luajit/src/lj_record.c
  5. +2 −3 src/apps/config/leader.lua
  6. +2 −6 src/apps/intel/intel10g.lua
  7. +8 −3 src/apps/intel/intel_app.lua
  8. +106 −2 src/apps/intel_mp/README.md
  9. +684 −70 src/apps/intel_mp/intel_mp.lua
  10. +96 −0 src/apps/intel_mp/loadgen.lua
  11. +13 −8 src/apps/intel_mp/selftest.sh
  12. BIN src/apps/intel_mp/source-vlan.pcap
  13. BIN src/apps/intel_mp/source2.pcap
  14. +9 −0 src/apps/intel_mp/test_10g_1q_blast_vmdq.sh
  15. +19 −0 src/apps/intel_mp/test_10g_2q_blast_vlan.sh
  16. +20 −0 src/apps/intel_mp/test_10g_2q_blast_vmdq.sh
  17. +20 −0 src/apps/intel_mp/test_10g_2q_blast_vmdq_auto.sh
  18. +3 −1 src/apps/intel_mp/test_10g_come_and_go.sh
  19. +43 −0 src/apps/intel_mp/test_10g_rate_limit.snabb
  20. +9 −5 src/apps/intel_mp/test_10g_rss_tab.snabb
  21. +48 −0 src/apps/intel_mp/test_10g_rxq_disable.snabb
  22. +5 −1 src/apps/intel_mp/test_10g_sw_sem.snabb
  23. +29 −0 src/apps/intel_mp/test_10g_txq_stop.snabb
  24. +5 −0 src/apps/intel_mp/test_10g_vlan.sh
  25. +75 −0 src/apps/intel_mp/test_10g_vmdq_mirror.snabb
  26. +69 −0 src/apps/intel_mp/test_10g_vmdq_pool_sel.snabb
  27. +68 −0 src/apps/intel_mp/test_10g_vmdq_race.snabb
  28. +80 −0 src/apps/intel_mp/test_10g_vmdq_reconfig_mac.snabb
  29. +5 −0 src/apps/intel_mp/test_10g_vmdq_tx.sh
  30. +9 −5 src/apps/intel_mp/test_1g_rss_tab.snabb
  31. +5 −1 src/apps/intel_mp/test_1g_sw_sem.snabb
  32. +88 −0 src/apps/intel_mp/testrecv.lua
  33. +3 −67 src/apps/intel_mp/testrecv.snabb
  34. +2 −1 src/apps/intel_mp/testsend.snabb
  35. +6 −1 src/apps/intel_mp/testup.snabb
  36. +55 −0 src/apps/intel_mp/testvlan.snabb
  37. +12 −0 src/apps/intel_mp/testvmdqrecv.snabb
  38. +59 −0 src/apps/intel_mp/testvmdqtx.snabb
  39. +1 −1 src/apps/pcap/pcap.lua
  40. +13 −2 src/core/memory.c
  41. +5 −1 src/lib/ctable.lua
  42. +18 −7 src/lib/hardware/pci.lua
  43. +2 −1 src/lib/hash/siphash.dasl
  44. +6 −3 src/lib/io/virtual_ether_mux.lua
  45. +5 −1 src/lib/protocol/gre.lua
  46. +2 −2 src/lib/timers/ingress_drop_monitor.lua
  47. +1 −1 src/lib/virtio/net_device.lua
  48. +12 −6 src/program/firehose/firehose.lua
  49. +2 −2 src/program/ipfix/probe/README
  50. +7 −6 src/program/ipfix/probe/probe.lua
  51. +9 −7 src/program/lisper/dev-env-perftest/baseline.lua
  52. +6 −4 src/program/lisper/dev-env-perftest/count.lua
  53. +12 −8 src/program/lisper/lisper.lua
  54. +0 −1 src/program/lwaftr/README
  55. +10 −0 src/program/lwaftr/csv_stats.lua
  56. +30 −14 src/program/lwaftr/loadtest/loadtest.lua
  57. +4 −2 src/program/lwaftr/run/run.lua
  58. +28 −20 src/program/lwaftr/setup.lua
  59. +0 −35 src/program/lwaftr/transient/README
  60. +0 −1 src/program/lwaftr/transient/README.inc
  61. +0 −136 src/program/lwaftr/transient/transient.lua
  62. +2 −2 src/program/packetblaster/lwaftr/lwaftr.lua
  63. +4 −5 src/program/packetblaster/packetblaster.lua
  64. +2 −2 src/program/snabbmark/snabbmark.lua
  65. +1 −1 src/program/snabbnfv/fuzz/fuzz.lua
  66. +4 −2 src/program/snabbnfv/traffic/traffic.lua
  67. +1 −3 src/program/snabbvmx/lwaftr/lwaftr.lua
  68. +15 −11 src/program/snabbvmx/lwaftr/setup.lua
  69. +7 −8 src/program/wall/common.lua
  70. +1 −2 src/program/wall/filter/README
  71. +1 −2 src/program/wall/spy/README
  72. +1 −1 src/program/wall/tests/bench.sh
View
@@ -1 +1 @@
2017.09
2017.11
@@ -2,7 +2,7 @@
local require, error, assert, tonumber, tostring,
setmetatable, pairs, ipairs, unpack, rawget, rawset,
pcall, type, table, string =
pcall, type, table, string =
require, error, assert, tonumber, tostring,
setmetatable, pairs, ipairs, unpack, rawget, rawset,
pcall, type, table, string
@@ -457,10 +457,40 @@ function S.sched_setaffinity(pid, mask, len) -- note len last as rarely used
return retbool(C.sched_setaffinity(pid or 0, len or s.cpu_set, mktype(t.cpu_set, mask)))
end
local function get_maxnumnodes()
local function readfile (filename)
local ret = {}
local bufsz = 1024
local buf = ffi.new("uint8_t[?]", bufsz)
local fd, errno = S.open(filename, 0)
if not fd then error(errno) end
while true do
local len = S.read(fd, buf, bufsz)
table.insert(ret, ffi.string(buf, len))
if len ~= bufsz then break end
end
fd:close()
return table.concat(ret)
end
local content = readfile("/proc/self/status")
for line in content:gmatch("[^\n]+") do
if line:match("^Mems_allowed:") then
line = line:gsub("^Mems_allowed:%s+", "")
-- In Mems_allowed each 9 characters (8 digit plus comma) represents
-- a 32-bit mask. Total number of maxnumnodes is the total sum of
-- the masks multiplied by 32. Line length is increased by one since
-- there's no comma at the end of line.
return math.floor(((#line+1)/9)*32)
end
end
end
function S.get_mempolicy(mode, mask, addr, flags)
mode = mode or t.int1()
mask = mktype(t.bitmask, mask)
local ret, err = C.get_mempolicy(mode, mask.mask, mask.size, addr or 0, c.MPOL_FLAG[flags])
-- Size should be at least equals to maxnumnodes.
local size = ffi.cast("uint64_t", math.max(tonumber(mask.size), get_maxnumnodes()))
local ret, err = C.get_mempolicy(mode, mask.mask, size, addr or 0, c.MPOL_FLAG[flags])
if ret == -1 then return nil, t.error(err or errno()) end
return { mode=mode[0], mask=mask }
end
View
@@ -141,6 +141,12 @@ LJLIB_CF(jit_attach)
return 0;
}
/* Calling this forces a trace stitch. */
LJLIB_CF(jit_tracebarrier)
{
return 0;
}
LJLIB_PUSH(top-5) LJLIB_SET(os)
LJLIB_PUSH(top-4) LJLIB_SET(arch)
LJLIB_PUSH(top-3) LJLIB_SET(version_num)
@@ -2411,8 +2411,6 @@ void lj_record_ins(jit_State *J)
case BC_IFORL:
case BC_IITERL:
case BC_ILOOP:
case BC_IFUNCF:
case BC_IFUNCV:
lj_trace_err(J, LJ_TRERR_BLACKL);
break;
@@ -2424,13 +2422,15 @@ void lj_record_ins(jit_State *J)
/* -- Function headers -------------------------------------------------- */
case BC_FUNCF:
case BC_IFUNCF:
rec_func_lua(J);
break;
case BC_JFUNCF:
rec_func_jit(J, rc);
break;
case BC_FUNCV:
case BC_IFUNCV:
rec_func_vararg(J);
rec_func_lua(J);
break;
@@ -680,9 +680,8 @@ function Leader:send_messages_to_followers()
for _,follower in ipairs(self.followers) do
if not follower.channel then
local name = '/'..tostring(follower.pid)..'/config-follower-channel'
-- local success, channel = pcall(channel.open, name)
--if success then follower.channel = channel end
follower.channel = channel.open(name)
local success, channel = pcall(channel.open, name)
if success then follower.channel = channel end
end
local channel = follower.channel
if channel then
@@ -189,9 +189,7 @@ do
end
end
function M_sf:ingress_packet_drops ()
return self.qs.QPRDC[0]()
end
function M_sf:rxdrop () return self.qs.QPRDC[0]() end
function M_sf:global_reset ()
local reset = bits{LinkReset=3, DeviceReset=26}
@@ -1004,9 +1002,7 @@ function M_vf:set_tx_rate (limit, priority)
return self
end
function M_vf:ingress_packet_drops ()
return self.pf.qs.QPRDC[self.rxstats]()
end
function M_vf:rxdrop () return self.pf.qs.QPRDC[self.rxstats]() end
rxdesc_t = ffi.typeof [[
union {
@@ -25,7 +25,8 @@ Intel82599 = {
rxcounter = {default=0},
txcounter = {default=0},
rate_limit = {default=0},
priority = {default=1.0}
priority = {default=1.0},
ring_buffer_size = {default=intel10g.ring_buffer_size()}
}
}
Intel82599.__index = Intel82599
@@ -52,6 +53,10 @@ end
function Intel82599:new (conf)
local self = {}
-- FIXME: ring_buffer_size is really a global variable for this
-- driver; taking the parameter as an initarg is just to make the
-- intel_mp transition easier.
intel10g.ring_buffer_size(conf.ring_buffer_size)
if conf.vmdq then
if devices[conf.pciaddr] == nil then
local pf = intel10g.new_pf(conf):open()
@@ -155,8 +160,8 @@ function Intel82599:pull ()
end
end
function Intel82599:ingress_packet_drops ()
return self.dev:ingress_packet_drops()
function Intel82599:rxdrop ()
return self.dev:rxdrop()
end
function Intel82599:add_receive_buffers ()
View
@@ -6,13 +6,37 @@ be attached to separate instances of the app on different processes.
The links are named `input` and `output`.
DIAGRAM: Intel
+-------+
| |
input ---->* Intel *----> output
| |
+-------+
## Caveats
If attaching multiple processes to a single NIC, performance appears
better with `engine.busywait = false`.
The `intel_mp.Intel` app can drive an Intel 82599 NIC at 14 million pps.
— Method **Intel:get_rxstats**
Returns a table with the following keys:
* `counter_id` - Counter id
* `packets` - Number of packets received
* `dropped` - Number of packets dropped
* `bytes` - Total bytes received
— Method **Intel:get_txstats**
Returns a table with the following keys:
* `counter_id` - Counter id
* `packets` - Number of packets sent
* `bytes` - Total bytes sent
## Configuration
— Key **pciaddr**
@@ -27,11 +51,87 @@ specified but assumed to be broadly applicable.
— Key **rxq**
*Optional*. The receive queue to attach to, numbered from 0.
*Optional*. The receive queue to attach to, numbered from 0. The default is 0.
When VMDq is enabled, this number is used to index a queue (0 or 1)
for the selected pool. Passing `"off"` will disable the receive queue.
— Key **txq**
*Optional*. The transmit queue to attach to, numbered from 0.
*Optional*. The transmit queue to attach to, numbered from 0. The default is 0.
Passing `"off"` will disable the transmit queue.
— Key **vmdq**
*Optional*. A boolean parameter that specifies whether VMDq (Virtual Machine
Device Queues) is enabled. When VMDq is enabled, each instance of the driver
is associated with a *pool* that can be assigned a MAC address or VLAN tag.
Packets are delivered to pools that match the corresponding MACs or VLAN tags.
Each pool may be associated with several receive and transmit queues.
For a given NIC, all driver instances should have this parameter either
enabled or disabled uniformly. If this is enabled, *macaddr* must be
specified.
— Key **poolnum**
*Optional*. The VMDq pool to associate with, numbered from 0. The default
is to select a pool number automatically.
— Key **macaddr**
*Optional*. The MAC address to use as a string. The default is a wild-card
(i.e., accept all packets).
— Key **vlan**
*Optional*. A twelve-bit integer (0-4095). If set, incoming packets from
other VLANs are dropped and outgoing packets are tagged with a VLAN header.
— Key **mirror**
*Optional*. A table. If set, this app will receive copies of all selected
packets on the physical port. The selection is configured by setting keys
of the *mirror* table. Either *mirror.pool* or *mirror.port* may be set.
If *mirror.pool* is `true` all pools defined on this physical port are
mirrored. If *mirror.pool* is an array of pool numbers then the specified
pools are mirrored.
If *mirror.port* is one of "in", "out" or "inout" all incoming and/or
outgoing packets on the port are mirrored respectively. Note that this
does not include internal traffic which does not enter or exit through
the physical port.
— Key **rxcounter**
— Key **txcounter**
*Optional*. Four bit integers (0-15). If set, incoming/outgoing packets
will be counted in the selected statistics counter respectively. Multiple
apps can share a counter. To retrieve counter statistics use
`Intel:get_rxstats()` and `Intel:get_txstats()`.
— Key **rate_limit**
*Optional*. Number. Limits the maximum Mbit/s to transmit. Default is 0
which means no limit. Only applies to outgoing traffic.
— Key **priority**
*Optional*. Floating point number. Weight for the *round-robin* algorithm
used to arbitrate transmission when *rate_limit* is not set or adds up to
more than the line rate of the physical port. Default is 1.0 (scaled to
the geometric middle of the scale which goes from 1/128 to 128). The
absolute value is not relevant, instead only the ratio between competing
apps controls their respective bandwidths. Only applies to outgoing
traffic.
For example, if two apps without *rate_limit* set have the same
*priority*, both get the same output bandwidth. If the priorities are
3.0/1.0, the output bandwidth is split 75%/25%. Likewise, 1.0/0.333 or
1.5/0.5 yield the same result.
Note that even a low-priority app can use the whole line rate unless other
(higher priority) apps are using up the available bandwidth.
— Key **rsskey**
@@ -92,3 +192,7 @@ Each chipset supports a differing number of receive / transmit queues:
* Intel82599 supports 16 receive and 16 transmit queues, 0-15
* Intel1g i350 supports 8 receive and 8 transmit queues, 0-7
* Intel1g i210 supports 4 receive and 4 transmit queues, 0-3
The Intel82599 supports both VMDq and RSS with 32/64 pools and 4/2 RSS queues for
each pool. This driver only supports configurations with 64 pools/2 queues.
While the i350 supports VMDq, this driver does not currently support it.
Oops, something went wrong.

0 comments on commit 248aae7

Please sign in to comment.