Skip to content

Commit

Permalink
intel_mp app instantiations make stats symlink counter forest
Browse files Browse the repository at this point in the history
This commit changes the intel_mp driver to make a forest of symlinks to
the shared stats counters, rather than a single link to the shared
counters directory.  This allows the app to indicate which queue it's
on, as well as allowing ptree and top to naturally track the set of
counters used by the app so they can manage associated .rrd files.

As part of this change, we remove the instance-local "pci" directory,
instead using per-app "pci" directories.  This allows us to more nicely
record which app is on which device.

This commit also updates "snabb top" to get PCI interfaces from within
apps' SHM frames.
  • Loading branch information
wingo committed Sep 12, 2018
1 parent 9cd0edb commit 44ef127
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 82 deletions.
144 changes: 117 additions & 27 deletions src/apps/intel_mp/intel_mp.lua
Expand Up @@ -317,6 +317,36 @@ byPciID = {
-- order to interchangeably use NIC drivers.
driver = Intel

local function shared_counter(srcdir, targetdir)
local mod = { type = "counter" }
local function dirsplit(name)
return name:match("^(.*)/([^/]+)$")
end
local function source(name)
if name:match('/') then
local head, tail = dirsplit(name)
return head..'/'..srcdir..'/'..tail
else
return srcdir..'/'..name
end
end
local function target(name)
if name:match('/') then
local head, tail = dirsplit(name)
return targetdir..'/'..tail
else
return targetdir..'/'..name
end
end
function mod.create(name)
shm.alias(source(name), target(name))
end
function mod.delete(name)
S.unlink(shm.resolve(source(name)))
end
return mod
end

function Intel:new (conf)
local self = {
r = {},
Expand Down Expand Up @@ -377,54 +407,81 @@ function Intel:new (conf)
self:set_txstats()
self:set_tx_rate()

-- Initialize per app statistics
self.shm = {
mtu = {counter, self.mtu},
rxcounter = {counter, self.rxcounter},
txcounter = {counter, self.txcounter},
txdrop = {counter}
}

-- Figure out if we are supposed to collect device statistics
self.run_stats = conf.run_stats or (self.master and conf.master_stats)

-- Expose per-device statistics from master
if self.run_stats then
local frame = {
dtime = {counter, C.get_unix_time()},
self.sync_timer = lib.throttle(0.01)
local stats_frame = {
-- Keep a copy of the mtu here to have all
-- data available in a single shm frame
mtu = {counter, self.mtu},
type = {counter, 0x1000}, -- ethernetCsmacd
macaddr = {counter, self.r.RAL64[0]:bits(0,48)},
speed = {counter},
status = {counter, 2}, -- Link down
type = {counter, 0x1000}, -- ethernetCsmacd
promisc = {counter},
macaddr = {counter, self.r.RAL64[0]:bits(0,48)},
rxbytes = {counter},
rxpackets = {counter},
rxmcast = {counter},
rxbcast = {counter},
rxdrop = {counter},
rxerrors = {counter},
rxdmapackets = {counter},
txbytes = {counter},
txpackets = {counter},
txmcast = {counter},
txbcast = {counter},
txdrop = {counter},
txerrors = {counter},
rxdmapackets = {counter}
}
self:init_queue_stats(frame)
self.stats = shm.create_frame(self.shm_root.."stats", frame)
self.sync_timer = lib.throttle(0.01)
self:init_queue_stats(stats_frame)
self.stats = shm.create_frame(self.shm_root.."stats", stats_frame)
end

-- Alias to the shared stats frame in each process's pci dir
-- The conditional checks if the symlink exists with lstat since
-- shm.exists requires the target exist, and the run_stats process
-- could go down and make the target cease to exist
if not S.lstat(shm.root.."/"..S.getpid().."/pci/"..self.pciaddress) then
shm.alias("pci/"..self.pciaddress, self.shm_root.."stats")
-- Expose per-device statistics from master
local shared_counter = shared_counter(
'pci/'..self.pciaddress, self.shm_root..'stats')
self.shm = {
dtime = {counter, C.get_unix_time()},
-- Keep a copy of the mtu here to have all
-- data available in a single shm frame
mtu = {counter, self.mtu},
type = {counter, 0x1000}, -- ethernetCsmacd
macaddr = {counter, self.r.RAL64[0]:bits(0,48)},
speed = {shared_counter},
status = {shared_counter},
promisc = {shared_counter}
}
if self.rxq then
self.shm.rxcounter = {counter, self.rxcounter}
self.shm.rxbytes = {shared_counter}
self.shm.rxpackets = {shared_counter}
self.shm.rxmcast = {shared_counter}
self.shm.rxbcast = {shared_counter}
self.shm.rxdrop = {shared_counter}
self.shm.rxerrors = {shared_counter}
self.shm.rxdmapackets = {shared_counter}
if self.rxcounter then
for _,k in pairs { 'drops', 'packets', 'bytes' } do
local name = "q" .. self.rxcounter .. "_rx" .. k
self.shm[name] = {shared_counter}
end
end
end
if self.txq then
self.shm.txcounter = {counter, self.txcounter}
self.shm.txbytes = {shared_counter}
self.shm.txpackets = {shared_counter}
self.shm.txmcast = {shared_counter}
self.shm.txbcast = {shared_counter}
self.shm.txdrop = {shared_counter}
self.shm.txerrors = {shared_counter}
if self.txcounter then
for _,k in pairs { 'packets', 'bytes' } do
local name = "q" .. self.txcounter .. "_tx" .. k
self.shm[name] = {shared_counter}
end
end
end

alarms.add_to_inventory(
Expand Down Expand Up @@ -452,6 +509,41 @@ function Intel:new (conf)
return self
end

function Intel:create_stats_symlinks ()
local exts = {}
for ext, mod in pairs(shm.types) do exts[mod] = ext end
local head = 'pci/'..self.pciaddress..'/'

for k, v in pairs(self.shm_frame) do
local mod, init = unpack(v)
local ext = assert(exts[mod])
local q, dir = k:match('^q(%d+)_([rt]x)')
if q and q ~= tostring(self[dir..'q']) then
-- Don't symlink in counters for queues not used by this app.
else
local tail = k..'.'..ext
-- This may fail if there are multiple apps that use this NIC,
-- e.g. one RX app and one TX app.
pcall(shm.alias, head..tail, self.shm_root.."stats/"..tail)
end
end
end

function Intel:remove_stats_symlinks ()
local exts = {}
for ext, mod in pairs(shm.types) do exts[mod] = ext end
local head = 'pci/'..self.pciaddress..'/'

for k, v in pairs(self.shm_frame) do
local mod, init = unpack(v)
local ext = assert(exts[mod])
local tail = k..'.'..ext
S.unlink(shm.resolve(head..tail))
end
-- Leave the empty dir; there could be other files there (e.g. RRD
-- files).
end

function Intel:disable_interrupts ()
self.r.EIMC(0xffffffff)
end
Expand Down Expand Up @@ -803,6 +895,7 @@ function Intel:stop ()
self:unset_pool()
end
self:unset_tx_rate()
self:remove_stats_symlinks()
if self.fd:flock("nb, ex") then
-- delete shm state for this NIC
shm.unlink(self.shm_root)
Expand All @@ -811,9 +904,6 @@ function Intel:stop ()
pci.set_bus_master(self.pciaddress, false)
pci.close_pci_resource(self.fd, self.base)
end
if self.run_stats then
shm.delete_frame(self.stats)
end
end

function Intel:discard_unsent_packets ()
Expand Down
3 changes: 1 addition & 2 deletions src/core/shm.lua
Expand Up @@ -63,8 +63,7 @@ end

function alias (name, target)
mkdir(lib.dirname(resolve(name)))
assert(S.symlink(root.."/"..resolve(target), root.."/"..resolve(name)),
"shm alias failed")
assert(S.symlink(root.."/"..resolve(target), root.."/"..resolve(name)))
end

function resolve (name)
Expand Down
80 changes: 27 additions & 53 deletions src/program/top/top.lua
Expand Up @@ -130,23 +130,6 @@ local function monitor_snabb_instance(pid, instance, counters, histograms, rrds)
instance.group = nil
end
needs_redisplay()
-- if a link to a pci folder is created, then monitor it too
elseif event.name:match('^'..dir..'/pci/[%d:%.]+$') then
local pciaddr = event.name:match('/pci/([%d:%.]+)$')
local target = S.readlink(event.name)
if target and event.kind == 'creat' then
local pci_rx = inotify.recursive_directory_inventory_events(target)
local pci_op = pci_rx:get_operation()
-- make snabb top think the path is relative to the link, not the target
local function relocate(v)
return { name = v.name:gsub("intel%-mp/[%d:%.]+/stats",
pid.."/pci/"..pciaddr),
kind = v.kind }
end
pci_op = pci_op:wrap(relocate)
rx_op = op.choice(pci_op, rx_op)
end
needs_redisplay()
elseif event.name:match('%.histogram$') then
local name = event.name:sub(#dir + 2):match('^(.*)%.histogram')
if event.kind == 'creat' then
Expand Down Expand Up @@ -372,36 +355,6 @@ local function compute_histograms_tree(histograms)
return ret
end

-- given a table of apps in the process & table of sets of pci counters,
-- remove the counters not used by the apps in the process
local function filter_queue_counters(apps, pcis)
local enabled_rx = 0
local enabled_tx = 0
for _, app in pairs(apps) do
for name, leaf in pairs(app) do
if name == "rxcounter" then
enabled_rx = lib.bits({bit=leaf.value()}, enabled_rx)
elseif name == "txcounter" then
enabled_tx = lib.bits({bit=leaf.value()}, enabled_tx)
end
end
end
for _, pci in pairs(pcis) do
for i=0, 15 do
if not lib.bitset(enabled_rx, i) then
pci["q"..i.."_".."rxpackets"] = nil
pci["q"..i.."_".."rxdrops"] = nil
pci["q"..i.."_".."rxbytes"] = nil
end
if not lib.bitset(enabled_tx, i) then
pci["q"..i.."_".."txpackets"] = nil
pci["q"..i.."_".."txdrops"] = nil
pci["q"..i.."_".."txbytes"] = nil
end
end
end
end

local function compute_counters_tree(counters, rrds)
if counters == nil then return {} end
local ret = {}
Expand All @@ -417,9 +370,6 @@ local function compute_counters_tree(counters, rrds)
function() return counter.read(v) end, rrds[k])
end
end
if ret.pci and ret.apps then
filter_queue_counters(ret.apps, ret.pci)
end
-- The rxpackets and rxbytes link counters are redundant.
if ret.links then
local links = {}
Expand Down Expand Up @@ -691,7 +641,32 @@ function compute_display_tree.interface(tree, prev, dt, t)
show_traffic('rx', pci, prev)
show_traffic('tx', pci, prev)
end
local function union(dst, src)
if type(src) == 'table' and not is_leaf(src) then
for k, v in pairs(src) do
if dst[k] == nil then
dst[k] = v
elseif not is_leaf(v) and not is_leaf(dst[k]) then
union(dst[k], v)
end
end
end
end
local function find_pci_devices(node, ret)
ret = ret or {}
if type(node) == 'table' and not is_leaf(node) then
for k, v in pairs(node) do
if k == 'pci' then
union(ret, v)
else
find_pci_devices(v, ret)
end
end
end
return ret
end
local function show_instance(label, instance, prev)
local pci, prev_pci = find_pci_devices(instance), find_pci_devices(prev)
local engine, prev_engine = instance.engine, prev and prev.engine
local latency = engine and engine.latency and engine.latency.value
local latency_str = ''
Expand All @@ -713,9 +688,8 @@ function compute_display_tree.interface(tree, prev, dt, t)
end
else
-- Note, PCI tree only shown on instances without workers.
for addr, pci in sortedpairs(instance.pci or {}) do
local prev = prev and prev.pci and prev.pci[addr]
show_pci(addr, pci, prev)
for addr, pci in sortedpairs(pci) do
show_pci(addr, pci, prev_pci[addr])
end
end
end
Expand Down

0 comments on commit 44ef127

Please sign in to comment.