Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 551 lines (446 sloc) 18.7 KB
#!/usr/bin/env python
# psdiff (part of ossobv/vcutil) // wdoekes/2016-2018 // Public Domain
# Generic (coarse) monitoring of daemon processes. Use in conjunction
# with a monitoring suite like Zabbix.
import argparse
import os
import re
import subprocess
import sys
import textwrap
import time
def udiff(alines, blines):
Replacement for difflib.ndiff that doesn't take exponential time.
The fancy ndiff is nice, but on some machines it takes almost a
minute for psdiff dumps of 200 lines. That's not acceptable. We'll
have to settle for something less pretty but faster.
We'd prefer the vcutils::udiff tool over difflib.unified_diff
because it syncs on the next line that matches instead of grouping
removals and additions together.
However, loading udiff optionally makes the output differ based on
the (non)existence of said tool, and loading it from python is
rather ugly. We'll settle for ugly-diff for now.
# # py2/py3 (importlib.util refused to load files)
# import imp
# try:
# udifflib = imp.load_source('udiff', './udiff')
# udifflib.filediff # test existence
# except (AttributeError, IOError, ImportError):
import difflib
diff_func = (lambda a, b: difflib.unified_diff(a, b, lineterm=''))
# else:
# diff_func = (lambda a, b: udifflib.filediff(a, b))
iter_ = iter(diff_func(alines, blines))
for line in iter_:
if line.startswith('@@ '):
break # skip +++/---
for line in iter_:
if not line.startswith('@@ '):
yield '{} {}'.format(line[0], line[1:])
class Process(object):
split = re.compile(r'\s+')
def from_line(cls, line, root):
args = cls.split.split(line, 3)
user = args[0]
pid = int(args[1])
ppid = int(args[2])
exe = args[3][0:8].rstrip()
assert args[3][8] == ' '
cmdline = args[3][9:]
return cls(ppid, pid, user, exe, cmdline, root=root)
def __init__(self, parent, pid, user, exe, cmdline, root=None):
self.parent = parent = pid
self.user = user
self.exe = exe
self.cmdline = cmdline
self.root = root or self
if not root:
self.process_map = {}
self.root.process_map[pid] = self
self.children = set()
def has_parent(self, include_self=False,
cmdline__startswith=None, pid=None):
obj = self
if not include_self:
obj = obj.parent
while obj:
if (cmdline__startswith is not None and
return True
if pid is not None and == pid:
return True
obj = obj.parent
return False
def fix_links(self):
if self.parent is not None:
# Convert ppid to parent.
self.parent = self.root.process_map[self.parent]
# Add us as child of the parent.
def get_process(self, pid):
if not pid:
return None
return self.root.process_map[pid]
def to_string(self, indent=0):
return u'{0}{1} {{user={2}}}'.format(
indent * ' ', self.cmdline.rstrip(), self.user)
def sort(self):
# Sort the children and convert the set into a list.
for child in self.children:
self.children = list(sorted(self.children))
def __hash__(self):
# Needs to be reimplemented because Python3 drops the
# auto-generated one when __eq__ is defined.
return id(self)
def __eq__(self, other):
# Only identity comparison yields same.
return (id(self) == id(other))
def __lt__(self, other):
# Quick, check identity:
if id(self) == id(other):
return False
# Lazy comparison.
if self.cmdline != other.cmdline:
return (self.cmdline < other.cmdline)
if self.user != other.user:
return (self.user < other.user)
if len(self.children) != len(other.children):
return (len(self.children) < len(other.children))
assert isinstance(self.children, list), self.children
assert isinstance(other.children, list), other.children
return (self.children < other.children)
def __str__(self):
return self.to_string()
class ProcessFormatter(object):
def __init__(self, root):
self.root = root
# Add self.adjust hook to alter process traits before sort.
# Sort processes.
self.visit((lambda process: process.sort()))
def visit(self, callable_):
"Visit all processes with callable."
for process in self.root.process_map.values():
def to_strings(self, process, indent=0):
"Return a list of stringified children with indentation."
ret = []
if self.include(process):
ret.append(self.to_string(process, indent))
for child in process.children: # has been sorted already
ret.extend(self.to_strings(child, indent + 1))
return ret
def __str__(self):
return u'\n'.join(self.to_strings(self.root)) + '\n'
def adjust(self, process):
The possibility to adjust cmdline and other process traits.
This is called before sort, so you'll want to use this to alter
def include(self, process):
"The possibility to exclude processes from the listing."
return True
def to_string(self, process, indent=0):
"The old hook to alter cmdline appearance."
return process.to_string(indent)
class FilteredProcessFormatter(ProcessFormatter):
def __init__(self, *args, **kwargs):
super(FilteredProcessFormatter, self).__init__(*args, **kwargs)
self._include_once = set()
def adjust(self, process):
super(FilteredProcessFormatter, self).adjust(process)
if process.cmdline.startswith((
'astcanary', # astcanary /var/run/asterisk/... <pid>
'/usr/sbin/amavisd-new ')):
# These processes have fluctuating arguments. Drop them.
process.cmdline = process.cmdline.split(' ', 1)[0]
elif process.cmdline.startswith((
'/usr/sbin/zabbix_proxy: ',
'/usr/sbin/zabbix_server: ')):
# zabbix_proxy and zabbix_server add " [info]" which changes.
# Drop it.
process.cmdline = process.cmdline.split(' [', 1)[0]
elif process.cmdline.startswith('docker-containerd-shim '):
# Docker instances have fluctuating arguments:
# docker-containerd-shim ID /var/...containerd/ID docker-runc
args = process.cmdline.split()
if len(args) == 4 and args[3] == 'docker-runc':
args[1] = '<ID>'
args[2] = args[2].rsplit('/', 1)[0] + '/<ID>'
process.cmdline = ' '.join(args)
def include(self, process):
# Ignore kernel threads.
if process.has_parent(include_self=True, pid=2):
return False
# Systemd renames itself after an update. We can't rename it
# back to /sbin/init because it may have been called differently
# (/sbin/init splash or whatever) in the first place.
elif == 1:
# /sbin/init [splash]
# /lib/systemd/systemd --system --deserialize 19
process.cmdline = 'INIT'
# Children of these commands are generally not daemons, skip
# them:
elif process.has_parent(include_self=True, cmdline__startswith=(
'sshd:', 'CRON', 'SCREEN',
'/USR/SBIN/CRON', # older cron
'/usr/sbin/CRON', # newer cron
# Is a daemon, but spawns children of init for extra work.
'/usr/bin/python /usr/bin/salt-minion',
# User systemd comes and goes as it pleases with (sd-pam).
'/lib/systemd/systemd --user')):
return False
# We want to monitor these daemons, but not their
# (grand)children, as they come and go:
elif process.has_parent(include_self=False, cmdline__startswith=(
'gocollect', # ubuntu (upstart)
'/usr/lib/postfix/master', # debian/ubuntu
'/usr/lib/postfix/sbin/master', # ubuntu16.04+
'/usr/libexec/postfix/master', # redhat
'/usr/sbin/gocollect', # sysv/systemd
return False
# These children may come and go, but we expect at least one:
# - multiprocess apache creates at least N processes but may add/remove
# based on demand
elif process.cmdline.startswith((
'/usr/sbin/apache2 ', # debian/ubuntu
'/usr/sbin/httpd ', # redhat
'php-fpm: ')):
key = (, process.user, process.cmdline)
if key in self._include_once:
return False
return super(FilteredProcessFormatter, self).include(process)
def diff(a, b):
a = a.rstrip().split('\n') # drop trailing LF
b = b.rstrip().split('\n') # drop trailing LF
if len(a) == 1 and not a[0]:
a = []
if len(b) == 1 and not b[0]:
b = []
changes = []
remap = {' ': 0, '-': -1, '+': 1}
for change in udiff(a, b):
if change[0] != '?':
changes.append((remap[change[0]], change[1:]))
return changes
def ps_faxu():
cmd = ['ps', 'ax', '-o', 'user,pid,ppid,fname,args']
output = subprocess.check_output
except AttributeError:
# Blegh. Python 2.6. (You did already `pip install argparse`, yes?)
proc = subprocess.Popen(cmd, bufsize=-1, stdout=subprocess.PIPE)
output = proc.communicate()[0]
output = subprocess.check_output(cmd, bufsize=-1)
output = output.decode('ascii', 'replace')
root = Process(None, 0, 'root', 'root', 'root')
for i, line in enumerate(output.split('\n')):
if i == 0 or not line:
Process.from_line(line, root)
# Update processes with proper links. This must be done last because
# the process output is unordered and we may not have the parent
# process info yet earlier.
for process in root.process_map.values():
return root
def get_formatter_class():
for path in ('/usr/local/etc/psdiff.conf', '/etc/psdiff.conf'):
# First check, and then open without exception handling. That way we
# see if anything is wrong with permissions and such.
if os.path.exists(path):
with open(path, 'r') as fh:
source =
# Ooohh.. eval/exec. Supply FilteredProcessFormatter and
# ProcessFormatter so they can be used as superclass.
io = {
'FilteredProcessFormatter': FilteredProcessFormatter,
'ProcessFormatter': ProcessFormatter,
exec(source, io)
return io['LocalFilteredProcessFormatter']
# Nothing found? Return the plain version.
return FilteredProcessFormatter
def get_new_output(formatter_class):
root = ps_faxu()
formatter = formatter_class(root)
return formatter.__str__() # returns unicode(!) on py2
def main():
parser = argparse.ArgumentParser(
Monitor differences between the list of expected running processes
and the actual running processes.
Expected usage
- set up server with various processes;
- run `psdiff write' to store a dump in /var/lib/psdiff.db;
- have zabbix (or your favorite monitoring tool) call
`psdiff show-missing' and `psdiff show-extra';
- have the monitoring tool show errors if there is output for any
of the commands.
This is just a STARTING POINT, it is NOT a replacement for DETAILED
process monitoring. You will still want to add daemon-specific
monitoring through other means.
On startup, an attempt is made to import /usr/local/etc/psdiff.conf
or /etc/psdiff.conf (a python file) where it looks for a class
called `LocalFilteredProcessFormatter', which will be used as
formatter class instead of the builtin FilteredProcessFormatter.
For example:
class LocalFilteredProcessFormatter(
def adjust(self, process):
super(LocalFilteredProcessFormatter, self).adjust(
# haproxy(1) sometimes adds " -sf PIDLIST" at the tail
if process.cmdline.startswith('/usr/sbin/haproxy'):
process.cmdline = (
process.cmdline.split(' -sf ', 1)[0])
# Java processes get unordered arguments...
if process.cmdline.startswith((
'/usr/bin/java', 'java')):
args = process.cmdline.split(' ')
process.cmdline = ' '.join(
[args[0]] + sorted(args[1:]))
def include(self, process):
# atop(1) has fluctuating arguments. I don't care
# whether it runs.
if process.cmdline.startswith('/usr/bin/atop '):
return False
return (
super(LocalFilteredProcessFormatter, self)
# vim: set syn=python:
Zabbix example
UserParameter=psdiff.missing,psdiff show-missing --retry 2>&1
UserParameter=psdiff.extra,psdiff show-extra --retry 2>&1
With triggers like this:
{Template Role Daemons:psdiff.missing.strlen()}<>0 or
{Template Role Daemons:psdiff.missing.nodata(30m)}=1
'action', nargs='?', default='show',
help="which action to perform; defaults to `show'",
choices=('show', 'show-extra', 'show-missing', 'dump', 'write'))
'--retry', action='store_true',
help=("retry `show' and friends up to 2 seconds; avoids false "
"positives caused by restarts and short lived children"))
args = parser.parse_args()
if (args.retry and
args.action not in ('show', 'show-extra', 'show-missing')):
parser.error("--retry works with the `show' and related actions only")
# First load up config.
formatter_class = get_formatter_class()
# Then load up old db.
with open('/var/lib/psdiff.db', 'r') as fh:
old_output =
if isinstance('', bytes): # py2
old_output = old_output.decode('utf-8', 'replace')
except IOError as e:
if e.errno != 2: # no such file
old_output = u''
process(args, old_output, formatter_class)
def process(args, old_output, formatter_class):
# NOTE: We never print() with u'' below, because in py2 it would "guess"
# the encoding of the recipient (tty) instead of choosing utf-8.
if args.action == 'dump':
new_output = get_new_output(formatter_class)
if isinstance('', bytes): # py2
new_output = new_output.encode('utf-8', 'replace')
print(new_output[0:-1]) # without trailing LF
elif args.action == 'write':
new_output = get_new_output(formatter_class)
if old_output != new_output:
with open('/var/lib/', 'w') as fh:
if isinstance('', bytes): # py2
new_output = new_output.encode('utf-8', 'replace')
if old_output:
os.rename('/var/lib/psdiff.db', '/var/lib/psdiff.old')
os.rename('/var/lib/', '/var/lib/psdiff.db')
print('Wrote to /var/lib/psdiff.db')
print('No changes to /var/lib/psdiff.db')
assert args.action.startswith('show'), args
# If args.retry, then try fetching a changeset for 5 times
# before concluding that something really has changed.
for sleeptime in (0.1, 0.3, 0.6, 1.0, 0):
new_output = get_new_output(formatter_class)
# Quick optimization.
if old_output == new_output:
changes = ()
changes = diff(old_output, new_output)
# There are changes. If we're not retrying, start showing
# the results.
if not args.retry or not sleeptime:
# Do the changes apply to us?
if args.action == 'show-missing' and not any(
which < 0 for which, line in changes):
if args.action == 'show-extra' and not any(
which > 0 for which, line in changes):
# Sleep a while.
# Show changes, if any.
status = 0
for which, line in changes:
if which and isinstance('', bytes): # py2
line = line.encode('utf-8', 'replace')
if which < 0 and args.action in ('show', 'show-missing'):
status = 1
elif which > 0 and args.action in ('show', 'show-extra'):
status = 1
sys.exit(status) # possibly non-zero exit
if __name__ == '__main__':
# vim: set ts=8 sw=4 sts=4 et ai: