Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 551 lines (446 sloc) 18.7 KB
#!/usr/bin/env python
# psdiff (part of ossobv/vcutil) // wdoekes/2016-2018 // Public Domain
#
# Generic (coarse) monitoring of daemon processes. Use in conjunction
# with a monitoring suite like Zabbix.
#
import argparse
import os
import re
import subprocess
import sys
import textwrap
import time
def udiff(alines, blines):
"""
Replacement for difflib.ndiff that doesn't take exponential time.
The fancy ndiff is nice, but on some machines it takes almost a
minute for psdiff dumps of 200 lines. That's not acceptable. We'll
have to settle for something less pretty but faster.
See: https://bugs.python.org/issue6931
We'd prefer the vcutils::udiff tool over difflib.unified_diff
because it syncs on the next line that matches instead of grouping
removals and additions together.
vcutil::udiff::
begin
+4
-1
+5
-2
+6
end
difflib.unified_diff::
begin
-1
-2
+4
+5
+6
end
However, loading udiff optionally makes the output differ based on
the (non)existence of said tool, and loading it from python is
rather ugly. We'll settle for ugly-diff for now.
"""
# # py2/py3 (importlib.util refused to load non-.py files)
# import imp
# try:
# udifflib = imp.load_source('udiff', './udiff')
# udifflib.filediff # test existence
# except (AttributeError, IOError, ImportError):
import difflib
diff_func = (lambda a, b: difflib.unified_diff(a, b, lineterm=''))
# else:
# diff_func = (lambda a, b: udifflib.filediff(a, b))
iter_ = iter(diff_func(alines, blines))
for line in iter_:
if line.startswith('@@ '):
break # skip +++/---
for line in iter_:
if not line.startswith('@@ '):
yield '{} {}'.format(line[0], line[1:])
class Process(object):
split = re.compile(r'\s+')
@classmethod
def from_line(cls, line, root):
args = cls.split.split(line, 3)
user = args[0]
pid = int(args[1])
ppid = int(args[2])
exe = args[3][0:8].rstrip()
assert args[3][8] == ' '
cmdline = args[3][9:]
return cls(ppid, pid, user, exe, cmdline, root=root)
def __init__(self, parent, pid, user, exe, cmdline, root=None):
self.parent = parent
self.pid = pid
self.user = user
self.exe = exe
self.cmdline = cmdline
self.root = root or self
if not root:
self.process_map = {}
self.root.process_map[pid] = self
self.children = set()
def has_parent(self, include_self=False,
cmdline__startswith=None, pid=None):
obj = self
if not include_self:
obj = obj.parent
while obj:
if (cmdline__startswith is not None and
obj.cmdline.startswith(cmdline__startswith)):
return True
if pid is not None and obj.pid == pid:
return True
obj = obj.parent
return False
def fix_links(self):
if self.parent is not None:
# Convert ppid to parent.
self.parent = self.root.process_map[self.parent]
# Add us as child of the parent.
self.parent.children.add(self)
def get_process(self, pid):
if not pid:
return None
return self.root.process_map[pid]
def to_string(self, indent=0):
return u'{0}{1} {{user={2}}}'.format(
indent * ' ', self.cmdline.rstrip(), self.user)
def sort(self):
# Sort the children and convert the set into a list.
for child in self.children:
child.sort()
self.children = list(sorted(self.children))
def __hash__(self):
# Needs to be reimplemented because Python3 drops the
# auto-generated one when __eq__ is defined.
return id(self)
def __eq__(self, other):
# Only identity comparison yields same.
return (id(self) == id(other))
def __lt__(self, other):
# Quick, check identity:
if id(self) == id(other):
return False
# Lazy comparison.
if self.cmdline != other.cmdline:
return (self.cmdline < other.cmdline)
if self.user != other.user:
return (self.user < other.user)
if len(self.children) != len(other.children):
return (len(self.children) < len(other.children))
assert isinstance(self.children, list), self.children
assert isinstance(other.children, list), other.children
return (self.children < other.children)
def __str__(self):
return self.to_string()
class ProcessFormatter(object):
def __init__(self, root):
self.root = root
# Add self.adjust hook to alter process traits before sort.
self.visit(self.adjust)
# Sort processes.
self.visit((lambda process: process.sort()))
def visit(self, callable_):
"Visit all processes with callable."
for process in self.root.process_map.values():
callable_(process)
def to_strings(self, process, indent=0):
"Return a list of stringified children with indentation."
ret = []
if self.include(process):
ret.append(self.to_string(process, indent))
for child in process.children: # has been sorted already
ret.extend(self.to_strings(child, indent + 1))
return ret
def __str__(self):
return u'\n'.join(self.to_strings(self.root)) + '\n'
def adjust(self, process):
"""
The possibility to adjust cmdline and other process traits.
This is called before sort, so you'll want to use this to alter
cmdline.
"""
pass
def include(self, process):
"The possibility to exclude processes from the listing."
return True
def to_string(self, process, indent=0):
"The old hook to alter cmdline appearance."
return process.to_string(indent)
class FilteredProcessFormatter(ProcessFormatter):
def __init__(self, *args, **kwargs):
super(FilteredProcessFormatter, self).__init__(*args, **kwargs)
self._include_once = set()
def adjust(self, process):
super(FilteredProcessFormatter, self).adjust(process)
if process.cmdline.startswith((
'astcanary', # astcanary /var/run/asterisk/... <pid>
'/usr/sbin/amavisd-new ')):
# These processes have fluctuating arguments. Drop them.
process.cmdline = process.cmdline.split(' ', 1)[0]
elif process.cmdline.startswith((
'/usr/sbin/zabbix_proxy: ',
'/usr/sbin/zabbix_server: ')):
# zabbix_proxy and zabbix_server add " [info]" which changes.
# Drop it.
process.cmdline = process.cmdline.split(' [', 1)[0]
elif process.cmdline.startswith('docker-containerd-shim '):
# Docker instances have fluctuating arguments:
# docker-containerd-shim ID /var/...containerd/ID docker-runc
args = process.cmdline.split()
if len(args) == 4 and args[3] == 'docker-runc':
args[1] = '<ID>'
args[2] = args[2].rsplit('/', 1)[0] + '/<ID>'
process.cmdline = ' '.join(args)
def include(self, process):
# Ignore kernel threads.
if process.has_parent(include_self=True, pid=2):
return False
# Systemd renames itself after an update. We can't rename it
# back to /sbin/init because it may have been called differently
# (/sbin/init splash or whatever) in the first place.
elif process.pid == 1:
# /sbin/init [splash]
# /lib/systemd/systemd --system --deserialize 19
process.cmdline = 'INIT'
# Children of these commands are generally not daemons, skip
# them:
elif process.has_parent(include_self=True, cmdline__startswith=(
'sshd:', 'CRON', 'SCREEN',
'/USR/SBIN/CRON', # older cron
'/usr/sbin/CRON', # newer cron
# Is a daemon, but spawns children of init for extra work.
'/usr/bin/python /usr/bin/salt-minion',
# User systemd comes and goes as it pleases with (sd-pam).
'/lib/systemd/systemd --user')):
return False
# We want to monitor these daemons, but not their
# (grand)children, as they come and go:
elif process.has_parent(include_self=False, cmdline__startswith=(
'gocollect', # ubuntu (upstart)
'/usr/lib/postfix/master', # debian/ubuntu
'/usr/lib/postfix/sbin/master', # ubuntu16.04+
'/usr/libexec/postfix/master', # redhat
'/usr/lib/postgresql/',
'/usr/sbin/dovecot',
'/usr/sbin/gocollect', # sysv/systemd
'/usr/sbin/vsftpd',
'/usr/sbin/zabbix_agentd')):
return False
# These children may come and go, but we expect at least one:
# - multiprocess apache creates at least N processes but may add/remove
# based on demand
elif process.cmdline.startswith((
'/usr/sbin/apache2 ', # debian/ubuntu
'/usr/sbin/httpd ', # redhat
'php-fpm: ')):
key = (process.parent.pid, process.user, process.cmdline)
if key in self._include_once:
return False
else:
self._include_once.add(key)
return super(FilteredProcessFormatter, self).include(process)
def diff(a, b):
a = a.rstrip().split('\n') # drop trailing LF
b = b.rstrip().split('\n') # drop trailing LF
if len(a) == 1 and not a[0]:
a = []
if len(b) == 1 and not b[0]:
b = []
changes = []
remap = {' ': 0, '-': -1, '+': 1}
for change in udiff(a, b):
if change[0] != '?':
changes.append((remap[change[0]], change[1:]))
return changes
def ps_faxu():
cmd = ['ps', 'ax', '-o', 'user,pid,ppid,fname,args']
try:
output = subprocess.check_output
except AttributeError:
# Blegh. Python 2.6. (You did already `pip install argparse`, yes?)
proc = subprocess.Popen(cmd, bufsize=-1, stdout=subprocess.PIPE)
output = proc.communicate()[0]
proc.wait()
else:
output = subprocess.check_output(cmd, bufsize=-1)
output = output.decode('ascii', 'replace')
root = Process(None, 0, 'root', 'root', 'root')
for i, line in enumerate(output.split('\n')):
if i == 0 or not line:
pass
else:
Process.from_line(line, root)
# Update processes with proper links. This must be done last because
# the process output is unordered and we may not have the parent
# process info yet earlier.
for process in root.process_map.values():
process.fix_links()
return root
def get_formatter_class():
for path in ('/usr/local/etc/psdiff.conf', '/etc/psdiff.conf'):
# First check, and then open without exception handling. That way we
# see if anything is wrong with permissions and such.
if os.path.exists(path):
with open(path, 'r') as fh:
source = fh.read()
# Ooohh.. eval/exec. Supply FilteredProcessFormatter and
# ProcessFormatter so they can be used as superclass.
io = {
'FilteredProcessFormatter': FilteredProcessFormatter,
'ProcessFormatter': ProcessFormatter,
}
exec(source, io)
return io['LocalFilteredProcessFormatter']
# Nothing found? Return the plain version.
return FilteredProcessFormatter
def get_new_output(formatter_class):
root = ps_faxu()
formatter = formatter_class(root)
return formatter.__str__() # returns unicode(!) on py2
def main():
parser = argparse.ArgumentParser(
prog='psdiff',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent('''\
Monitor differences between the list of expected running processes
and the actual running processes.
'''),
epilog=textwrap.dedent('''\
Expected usage
--------------
- set up server with various processes;
- run `psdiff write' to store a dump in /var/lib/psdiff.db;
- have zabbix (or your favorite monitoring tool) call
`psdiff show-missing' and `psdiff show-extra';
- have the monitoring tool show errors if there is output for any
of the commands.
This is just a STARTING POINT, it is NOT a replacement for DETAILED
process monitoring. You will still want to add daemon-specific
monitoring through other means.
Adjustments
-----------
On startup, an attempt is made to import /usr/local/etc/psdiff.conf
or /etc/psdiff.conf (a python file) where it looks for a class
called `LocalFilteredProcessFormatter', which will be used as
formatter class instead of the builtin FilteredProcessFormatter.
For example:
class LocalFilteredProcessFormatter(
FilteredProcessFormatter):
def adjust(self, process):
super(LocalFilteredProcessFormatter, self).adjust(
process)
# haproxy(1) sometimes adds " -sf PIDLIST" at the tail
if process.cmdline.startswith('/usr/sbin/haproxy'):
process.cmdline = (
process.cmdline.split(' -sf ', 1)[0])
# Java processes get unordered arguments...
if process.cmdline.startswith((
'/usr/bin/java', 'java')):
args = process.cmdline.split(' ')
process.cmdline = ' '.join(
[args[0]] + sorted(args[1:]))
def include(self, process):
# atop(1) has fluctuating arguments. I don't care
# whether it runs.
if process.cmdline.startswith('/usr/bin/atop '):
return False
return (
super(LocalFilteredProcessFormatter, self)
.include(process))
# vim: set syn=python:
Zabbix example
--------------
UserParameter=psdiff.missing,psdiff show-missing --retry 2>&1
UserParameter=psdiff.extra,psdiff show-extra --retry 2>&1
With triggers like this:
{Template Role Daemons:psdiff.missing.strlen()}<>0 or
{Template Role Daemons:psdiff.missing.nodata(30m)}=1
'''))
parser.add_argument(
'action', nargs='?', default='show',
help="which action to perform; defaults to `show'",
choices=('show', 'show-extra', 'show-missing', 'dump', 'write'))
parser.add_argument(
'--retry', action='store_true',
help=("retry `show' and friends up to 2 seconds; avoids false "
"positives caused by restarts and short lived children"))
args = parser.parse_args()
if (args.retry and
args.action not in ('show', 'show-extra', 'show-missing')):
parser.error("--retry works with the `show' and related actions only")
# First load up config.
formatter_class = get_formatter_class()
# Then load up old db.
try:
with open('/var/lib/psdiff.db', 'r') as fh:
old_output = fh.read()
if isinstance('', bytes): # py2
old_output = old_output.decode('utf-8', 'replace')
except IOError as e:
if e.errno != 2: # no such file
raise
old_output = u''
process(args, old_output, formatter_class)
def process(args, old_output, formatter_class):
# NOTE: We never print() with u'' below, because in py2 it would "guess"
# the encoding of the recipient (tty) instead of choosing utf-8.
if args.action == 'dump':
new_output = get_new_output(formatter_class)
if isinstance('', bytes): # py2
new_output = new_output.encode('utf-8', 'replace')
print(new_output[0:-1]) # without trailing LF
elif args.action == 'write':
new_output = get_new_output(formatter_class)
if old_output != new_output:
with open('/var/lib/psdiff.new', 'w') as fh:
if isinstance('', bytes): # py2
new_output = new_output.encode('utf-8', 'replace')
fh.write(new_output)
if old_output:
os.rename('/var/lib/psdiff.db', '/var/lib/psdiff.old')
os.rename('/var/lib/psdiff.new', '/var/lib/psdiff.db')
print('Wrote to /var/lib/psdiff.db')
else:
print('No changes to /var/lib/psdiff.db')
else:
assert args.action.startswith('show'), args
# If args.retry, then try fetching a changeset for 5 times
# before concluding that something really has changed.
for sleeptime in (0.1, 0.3, 0.6, 1.0, 0):
new_output = get_new_output(formatter_class)
# Quick optimization.
if old_output == new_output:
changes = ()
break
changes = diff(old_output, new_output)
# There are changes. If we're not retrying, start showing
# the results.
if not args.retry or not sleeptime:
break
# Do the changes apply to us?
if args.action == 'show-missing' and not any(
which < 0 for which, line in changes):
break
if args.action == 'show-extra' and not any(
which > 0 for which, line in changes):
break
# Sleep a while.
time.sleep(sleeptime)
# Show changes, if any.
status = 0
for which, line in changes:
if which and isinstance('', bytes): # py2
line = line.encode('utf-8', 'replace')
if which < 0 and args.action in ('show', 'show-missing'):
print('-{0}'.format(line))
status = 1
elif which > 0 and args.action in ('show', 'show-extra'):
print('+{0}'.format(line))
status = 1
sys.exit(status) # possibly non-zero exit
if __name__ == '__main__':
main()
# vim: set ts=8 sw=4 sts=4 et ai: