diff --git a/docs/integrations/zabbix/sample-1.json b/docs/integrations/zabbix/sample-1.json new file mode 100644 index 0000000..38a5839 --- /dev/null +++ b/docs/integrations/zabbix/sample-1.json @@ -0,0 +1,28 @@ + { + "status": "0", + "recovery_mode": "0", + "description": "/etc/resolv.conf: Disk space is low (used > {$VFS.FS.PUSED.MAX.WARN:\"/etc/resolv.conf\"}%)", + "state": "0", + "url": "", + "type": "0", + "templateid": "0", + "correlation_tag": "", + "lastchange": "1590642631", + "value": "1", + "priority": "2", + "triggerid": "16919", + "opdata": "Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})", + "flags": "4", + "comments": "Two conditions should match: First, space utilization should be above {$VFS.FS.PUSED.MAX.WARN:\"/etc/resolv.conf\"}.\r\n Second condition should be one of the following:\r\n - The disk free space is less than 10G.\r\n - The disk will be full in less than 24 hours.", + "error": "", + "hosts": [ + { + "host": "node3", + "hostid": "10318" + } + ], + "correlation_mode": "0", + "expression": "{19856}>{$VFS.FS.PUSED.MAX.WARN:\"/etc/resolv.conf\"} and\r\n(({19857}-{19858})<10G or {19859}<1d)", + "recovery_expression": "", + "manual_close": "1" + } diff --git a/docs/integrations/zabbix/sample-2.json b/docs/integrations/zabbix/sample-2.json new file mode 100644 index 0000000..c00eefd --- /dev/null +++ b/docs/integrations/zabbix/sample-2.json @@ -0,0 +1,30 @@ +[ + { + "status": "0", + "recovery_mode": "0", + "description": "Zabbix agent is not available (for {$AGENT.TIMEOUT})", + "state": "0", + "url": "", + "type": "0", + "templateid": "16198", + "correlation_tag": "", + "lastchange": "1590405426", + "value": "1", + "priority": "3", + "triggerid": "16873", + "opdata": "", + "flags": "0", + "comments": "For passive only agents, host availability is used with {$AGENT.TIMEOUT} as time threshold.", + "error": "", + "hosts": [ + { + "host": "node2", + "hostid": "10319" + } + ], + "correlation_mode": "0", + "expression": "{19743}=0", + "recovery_expression": "", + "manual_close": "1" + } +] diff --git a/docs/integrations/zabbix/sample-3.json b/docs/integrations/zabbix/sample-3.json new file mode 100644 index 0000000..ca50c72 --- /dev/null +++ b/docs/integrations/zabbix/sample-3.json @@ -0,0 +1,28 @@ + { + "status": "0", + "recovery_mode": "0", + "description":"Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)", + "state": "0", + "url": "", + "type": "0", + "templateid": "0", + "correlation_tag": "", + "lastchange": "1511612631", + "value": "1", + "priority": "3", + "triggerid": "16565", + "opdata": "Load averages(1m 5m 15m): ({ITEM.LASTVALUE1} {ITEM.LASTVALUE3} {ITEM.LASTVALUE4}), # of CPUs: {ITEM.LASTVALUE2}", + "flags": "4", + "comments": "Per CPU load average is too high. Your system may be slow to respond.", + "error": "", + "hosts": [ + { + "host": "node3", + "hostid": "10318" + } + ], + "correlation_mode": "0", + "expression": "{19277}/{19278}>{$LOAD_AVG_PER_CPU.MAX.WARN}\r\nand {19279}>0\r\nand {19280}>0", + "recovery_expression": "", + "manual_close": "1" + } diff --git a/helm/orca/config/alerts-mapping.yaml b/helm/orca/config/alerts-mapping.yaml index fe63303..c6e6251 100644 --- a/helm/orca/config/alerts-mapping.yaml +++ b/helm/orca/config/alerts-mapping.yaml @@ -696,3 +696,258 @@ elastalert: properties: name: pod_name namespace: namespace_name + +zabbix: + mappings: + - name: "Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Interface eth0: Link down' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Interface tunl0: Link down' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: '/etc/hostname: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:/etc/hostname}%)' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: '/etc/hosts: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:/etc/hosts}%)' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix agent is not available (for {$AGENT.TIMEOUT})' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix alerter processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix alert manager processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix alert syncer processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix configuration syncer processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix discoverer processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix escalator processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix history syncer processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix housekeeper processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix http poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix icmp pinger processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix ipmi manager processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix ipmi poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix java poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix LLD manager processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix LLD worker processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix preprocessing manager processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix preprocessing worker processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix proxy poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix self-monitoring processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix snmp trapper processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix task manager processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix timer processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix trapper processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix unreachable poller processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix value cache working in low memory mode' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: 'Zabbix vmware collector processes more than 75% busy' + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "Unavailable by ICMP ping" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "High ICMP ping loss" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "High ICMP ping response time" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "System status is in critical state" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "System is in unrecoverable state!" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "System status is in warning state" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "Temperature is above warning threshold: >{$TEMP_WARN:}" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "Temperature is above critical threshold: >{$TEMP_CRIT:}" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node + - name: "Temperature is too low: <{$TEMP_CRIT_LOW:}" + source_mapping: + origin: kubernetes + kind: node + properties: + name: node diff --git a/helm/orca/templates/configmap.yaml b/helm/orca/templates/configmap.yaml index 2a5a256..3da1b82 100644 --- a/helm/orca/templates/configmap.yaml +++ b/helm/orca/templates/configmap.yaml @@ -41,6 +41,13 @@ data: url: {{ .Values.probes.prometheus.url }} resync_period: {{ .Values.probes.prometheus.resync_period }} + zabbix: + enabled: {{ .Values.probes.zabbix.enabled }} + url: {{ .Values.probes.zabbix.url }} + username: {{ .Values.probes.zabbix.username }} + password: {{ .Values.probes.zabbix.password }} + resync_period: {{ .Values.probes.zabbix.resync_period }} + ingestors: prometheus: enabled: {{ .Values.ingestors.prometheus.enabled }} diff --git a/helm/orca/values.yaml b/helm/orca/values.yaml index 0d6fe13..e3a6e04 100644 --- a/helm/orca/values.yaml +++ b/helm/orca/values.yaml @@ -75,6 +75,12 @@ probes: enabled: false resync_period: 60 url: + zabbix: + enabled: false + resync_period: 60 + url: + username: + password: ingestors: prometheus: diff --git a/orca/common/config.py b/orca/common/config.py index 4f014af..4439393 100644 --- a/orca/common/config.py +++ b/orca/common/config.py @@ -151,6 +151,20 @@ def parse(self, config_path): 'url': {'type': 'string'}, 'resync_period': {'type': 'integer', 'coerce': int, 'default': 300} } + }, + 'zabbix': { + 'type': 'dict', + 'schema': { + 'enabled': { + 'type': 'boolean', + 'coerce': (str, str_utils.to_bool), + 'default': False + }, + 'url': {'type': 'string'}, + 'username': {'type': 'string'}, + 'password': {'type': 'string'}, + 'resync_period': {'type': 'integer', 'coerce': int, 'default': 300} + } } } }, diff --git a/orca/topology/alerts/__init__.py b/orca/topology/alerts/__init__.py index f294d7e..f0877a6 100644 --- a/orca/topology/alerts/__init__.py +++ b/orca/topology/alerts/__init__.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from orca.topology.alerts import elastalert, falco, prometheus +from orca.topology.alerts import elastalert, falco, prometheus, zabbix -__all__ = ['elastalert', 'falco', 'prometheus'] +__all__ = ['elastalert', 'falco', 'prometheus', 'zabbix'] diff --git a/orca/topology/alerts/zabbix/__init__.py b/orca/topology/alerts/zabbix/__init__.py new file mode 100644 index 0000000..547d098 --- /dev/null +++ b/orca/topology/alerts/zabbix/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2020 OpenRCA Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from orca.topology import bundle +from orca.topology.alerts.zabbix import linker, probe + + +def get_probes(): + return [ + bundle.ProbeBundle( + probe=probe.AlertProbe, + linkers=[linker.AlertLinker] + ) + ] diff --git a/orca/topology/alerts/zabbix/extractor.py b/orca/topology/alerts/zabbix/extractor.py new file mode 100644 index 0000000..0ef6f0f --- /dev/null +++ b/orca/topology/alerts/zabbix/extractor.py @@ -0,0 +1,52 @@ +# Copyright 2020 OpenRCA Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from orca.common import str_utils +from orca.topology.alerts import extractor + + +class Extractor(extractor.Extractor): + + """Base class for Zabbix extractors.""" + + @property + def origin(self): + return 'zabbix' + + @classmethod + def get(cls): + return super().get('zabbix') + + +class AlertExtractor(Extractor): + + """Extractor for Alert entities retrieved from Zabbix API.""" + + def _extract_name(self, entity): + return entity['trigger'][0] + + def _extract_source_labels(self, entity): + return {'node': entity['host']} + + def _extract_properties(self, entity): + properties = {} + properties['status'] = self._extract_status(entity) + properties['severity'] = self._extract_severity(entity) + return properties + + def _extract_status(self, entity): + return 'active' if entity['trigger'][2] == '1' else 'inactive' + + def _extract_severity(self, entity): + return entity['trigger'][1] diff --git a/orca/topology/alerts/zabbix/linker.py b/orca/topology/alerts/zabbix/linker.py new file mode 100644 index 0000000..5b8c834 --- /dev/null +++ b/orca/topology/alerts/zabbix/linker.py @@ -0,0 +1,24 @@ +# Copyright 2020 OpenRCA Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from orca.topology.alerts import linker + + +class AlertLinker(linker.AlertLinker): + + """Links Alert entities.""" + + @classmethod + def get(cls, graph): + return super().get(graph, 'zabbix') diff --git a/orca/topology/alerts/zabbix/probe.py b/orca/topology/alerts/zabbix/probe.py new file mode 100644 index 0000000..9eb2c39 --- /dev/null +++ b/orca/topology/alerts/zabbix/probe.py @@ -0,0 +1,35 @@ +# Copyright 2020 OpenRCA Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyzabbix import ZabbixAPI +from orca.common import config +from orca.topology import probe, utils +from orca.topology.alerts.zabbix import extractor, upstream + +CONFIG = config.CONFIG + + +class AlertProbe(probe.PullProbe): + + """Alert pull probe.""" + + @classmethod + def get(cls, graph): + zabbix_client = ZabbixAPI(CONFIG.probes.zabbix.url) + return cls( + graph=graph, + upstream_proxy=upstream.UpstreamProxy(zabbix_client), + extractor=extractor.AlertExtractor.get(), + synchronizer=utils.NodeSynchronizer(graph), + resync_period=CONFIG.probes.zabbix.resync_period) diff --git a/orca/topology/alerts/zabbix/upstream.py b/orca/topology/alerts/zabbix/upstream.py new file mode 100644 index 0000000..3d71309 --- /dev/null +++ b/orca/topology/alerts/zabbix/upstream.py @@ -0,0 +1,46 @@ +# Copyright 2020 OpenRCA Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyzabbix import ZabbixAPI +from orca.topology import upstream +from orca.common import config + +CONFIG = config.CONFIG + + +class UpstreamProxy(upstream.UpstreamProxy): + + """Upstream proxy for Zabbix.""" + + def __init__(self, client): + self._client = client + self._client.login(CONFIG.probes.zabbix.username, CONFIG.probes.zabbix.password) + + def get_all(self): + all = self._client.trigger.get( + only_true=1, + active=1, + output='extend', + selectHosts=['host']) + triggers = [] + for trigger in all: + for host in trigger['hosts']: + payload = {} + payload['host'] = host['host'] + payload['trigger'] = [ trigger.pop(property) for property in ['description','priority','value']] + triggers.append(payload) + return triggers + + def get_events(self): + raise NotImplementedError() diff --git a/orca/topology/manager.py b/orca/topology/manager.py index 2cacdd8..d9e3dff 100644 --- a/orca/topology/manager.py +++ b/orca/topology/manager.py @@ -50,6 +50,9 @@ def initialize(self): if CONFIG.probes.prometheus.enabled: probe_modules.append(alerts.prometheus) + if CONFIG.probes.zabbix.enabled: + probe_modules.append(alerts.zabbix) + for probe_module in probe_modules: for probe_bundle in probe_module.get_probes(): self.add(probe.ProbeRunner, workers=1, args=(probe_bundle, graph_lock)) diff --git a/requirements.txt b/requirements.txt index 478127c..34b32a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ kubernetes # Apache License 2.0 python-arango # MIT pyyaml # MIT requests # Apache License 2.0 +pyzabbix # LGPL 2.1