7405 elasticsearch_indices: new check for monitoring of elasticsearch…

… indices This check uses the datasource program "Check state of elasticsearch". The document count and size growth per minute will be recorded and averaged for a set amount of time (30 minutes by default). The last recent document count growth and size growth will be compared to this average value. You can set thresholds on how much higher the last recent growth can be compared to the average growth. Change-Id: I840d1651750d89c39a3cf21666d7e504333f0a1d
Checkmk · Jun 11, 2019 · d0dd5d4 · d0dd5d4
1 parent 7ca740b
commit d0dd5d4
Show file tree

Hide file tree

Showing 6 changed files with 483 additions and 0 deletions.
diff --git a/.werks/7405 b/.werks/7405
@@ -0,0 +1,15 @@
+Title: elasticsearch_indices: new check for monitoring of elasticsearch indices
+Level: 1
+Component: checks
+Compatible: compat
+Edition: cre
+Version: 1.6.0i1
+Date: 1554373898
+Class: feature
+
+This check uses the datasource program "Check state of elasticsearch". The
+document count and size growth per minute will be recorded and averaged for a
+set amount of time (30 minutes by default). The last recent document count
+growth and size growth will be compared to this average value.  You can set
+thresholds on how much higher the last recent growth can be compared to the
+average growth.
diff --git a/checkman/elasticsearch_indices b/checkman/elasticsearch_indices
@@ -0,0 +1,18 @@
+title: Elasticsearch: Indices
+agents: elasticsearch
+catalog: app/elasticsearch
+license: GPL
+distribution: check_mk
+description:
+ This check uses the special agent for elasticsearch. The document count and
+ size growth per minute will be recorded and averaged for a set amount of time
+ (30 minutes by default). The last recent document count growth and size growth
+ will be compared to this average value. Thresholds can be set on how much higher
+ the last recent growth can be compared to the average growth.
+
+inventory:
+ One check for each indice will be created
+
+item:
+ The name of the index
+
diff --git a/checks/elasticsearch_indices b/checks/elasticsearch_indices
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+# -*- encoding: utf-8; py-indent-offset: 4 -*-
+# +------------------------------------------------------------------+
+# |             ____ _               _        __  __ _  __           |
+# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
+# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
+# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
+# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
+# |                                                                  |
+# | Copyright Mathias Kettner 2019             mk@mathias-kettner.de |
+# +------------------------------------------------------------------+
+#
+# This file is part of Check_MK.
+# The official homepage is at http://mathias-kettner.de/check_mk.
+#
+# check_mk is free software;  you can redistribute it and/or modify it
+# under the  terms of the  GNU General Public License  as published by
+# the Free Software Foundation in version 2.  check_mk is  distributed
+# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
+# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
+# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
+# tails. You should have  received  a copy of the  GNU  General Public
+# License along with GNU Make; see the file  COPYING.  If  not,  write
+# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
+# Boston, MA 02110-1301 USA.
+
+# <<<elasticsearch_indices>>>
+# filebeat-6.5.4 11492614 801366929
+
+ElasticIndex = collections.namedtuple("ElasticIndex", [
+    "count",
+    "size",
+])
+
+
+def parse_elasticsearch_indices(info):
+    parsed = {}
+
+    for line in info:
+        try:
+            index, count, size = line
+            if parsed.get(index):
+                # sum up count/size for index names which already appeared
+                parsed[index] = parsed[index]._replace(count=parsed[index].count + int(count))
+                parsed[index] = parsed[index]._replace(size=parsed[index].size + int(size))
+            else:
+                parsed.setdefault(index, ElasticIndex(
+                    int(count),
+                    int(size),
+                ))
+        except (IndexError, ValueError):
+            pass
+
+    return parsed
+
+
+@get_parsed_item_data
+def check_elasticsearch_indices(item, params, item_data):
+    for value, name, infotext, hr_func, unit in [
+        (item_data.count, "elasticsearch_count", "count", int, "docs"),
+        (item_data.size, "elasticsearch_size", "size", get_bytes_human_readable, ""),
+    ]:
+
+        if params.get("%s_rate" % name):
+            avg = params.get("%s_rate" % name)[2]
+        else:
+            avg = 30
+
+        yield check_levels(
+            value,
+            name, (None, None),
+            human_readable_func=hr_func,
+            unit=unit,
+            infoname="Total %s" % infotext)
+
+        this_time = time.time()
+        rate = get_rate("elasticsearch_indices.%s.%s" % (name, item), this_time, value) * 60
+
+        avg_rate = get_average("elasticsearch_indices.%s.%s.avg" % (name, item), this_time, rate,
+                               avg)
+
+        if params.get("%s_rate" % name):
+            warn, crit, avg = params["%s_rate" % name]
+            avg_rate_warn = (avg_rate * (warn / 100 + 1))
+            avg_rate_crit = (avg_rate * (crit / 100 + 1))
+            params_avg = (avg_rate_warn, avg_rate_crit)
+        else:
+            params_avg = (None, None)
+
+        yield check_levels(
+            rate,
+            "%s_rate" % name,
+            params_avg,
+            human_readable_func=hr_func,
+            unit="%s per Minute" % unit,
+            infoname="Average %s" % infotext)
+
+
+check_info["elasticsearch_indices"] = {
+    "check_function": check_elasticsearch_indices,
+    "parse_function": parse_elasticsearch_indices,
+    "inventory_function": discover(),
+    "service_description": "Elasticsearch Indice %s",
+    "has_perfdata": True,
+    "group": "elasticsearch_indices",
+}
diff --git a/cmk/gui/plugins/metrics/check_mk.py b/cmk/gui/plugins/metrics/check_mk.py
@@ -5757,6 +5757,156 @@ def register_fireye_metrics():
     "color": "22/a",
 }
 
+metric_info["elasticsearch_size_avg"] = {
+    "title": _("Average size growth"),
+    "unit": "bytes",
+    "color": "33/a",
+}
+
+metric_info["elasticsearch_size_rate"] = {
+    "title": _("Size growth per minute"),
+    "unit": "bytes",
+    "color": "31/a",
+}
+
+metric_info["elasticsearch_size"] = {
+    "title": _("Total size"),
+    "unit": "bytes",
+    "color": "31/b",
+}
+
+metric_info["elasticsearch_count_avg"] = {
+    "title": _("Average document count growth"),
+    "unit": "count",
+    "color": "45/a",
+}
+
+metric_info["elasticsearch_count_rate"] = {
+    "title": _("Document count growth per minute"),
+    "unit": "count",
+    "color": "43/a",
+}
+
+metric_info["elasticsearch_count"] = {
+    "title": _("Total documents"),
+    "unit": "count",
+    "color": "43/b",
+}
+
+metric_info["active_primary_shards"] = {
+    "title": _("Active primary shards"),
+    "unit": "count",
+    "color": "21/b",
+}
+
+metric_info["active_shards"] = {
+    "title": _("Active shards"),
+    "unit": "count",
+    "color": "21/a",
+}
+
+metric_info["active_shards_percent_as_number"] = {
+    "title": _("Active shards in percent"),
+    "unit": "%",
+    "color": "22/a",
+}
+
+metric_info["number_of_data_nodes"] = {
+    "title": _("Data nodes"),
+    "unit": "count",
+    "color": "41/a",
+}
+
+metric_info["delayed_unassigned_shards"] = {
+    "title": _("Delayed unassigned shards"),
+    "unit": "count",
+    "color": "42/a",
+}
+
+metric_info["initializing_shards"] = {
+    "title": _("Initializing shards"),
+    "unit": "count",
+    "color": "52/a",
+}
+
+metric_info["number_of_nodes"] = {
+    "title": _("Nodes"),
+    "unit": "count",
+    "color": "43/a",
+}
+
+metric_info["number_of_pending_tasks"] = {
+    "title": _("Pending tasks"),
+    "unit": "count",
+    "color": "53/a",
+}
+
+metric_info["number_of_pending_tasks_rate"] = {
+    "title": _("Pending tasks delta"),
+    "unit": "count",
+    "color": "14/b",
+}
+
+metric_info["number_of_pending_tasks_avg"] = {
+    "title": _("Average pending tasks delta"),
+    "unit": "count",
+    "color": "26/a",
+}
+
+metric_info["relocating_shards"] = {
+    "title": _("Relocating shards"),
+    "unit": "count",
+    "color": "16/b",
+}
+
+metric_info["task_max_waiting_in_queue_millis"] = {
+    "title": _("Maximum wait time of all tasks in queue"),
+    "unit": "count",
+    "color": "11/a",
+}
+
+metric_info["unassigned_shards"] = {
+    "title": _("Unassigned shards"),
+    "unit": "count",
+    "color": "14/a",
+}
+
+metric_info["number_of_in_flight_fetch"] = {
+    "title": _("Ongoing shard info requests"),
+    "unit": "count",
+    "color": "31/a",
+}
+
+metric_info["open_file_descriptors"] = {
+    "title": _("Open file descriptors"),
+    "unit": "count",
+    "color": "14/a",
+}
+
+metric_info["max_file_descriptors"] = {
+    "title": _("Max file descriptors"),
+    "unit": "count",
+    "color": "11/a",
+}
+
+metric_info["cpu_percent"] = {
+    "title": _("CPU used"),
+    "unit": "%",
+    "color": "16/a",
+}
+
+metric_info["cpu_total_in_millis"] = {
+    "title": _("CPU total in ms"),
+    "unit": "1/s",
+    "color": "26/a",
+}
+
+metric_info["mem_total_virtual_in_bytes"] = {
+    "title": _("Total virtual memory"),
+    "unit": "bytes",
+    "color": "53/a",
+}
+
 #.
 #   .--Checks--------------------------------------------------------------.
 #   |                    ____ _               _                            |
@@ -9235,6 +9385,42 @@ def get_skype_mobile_perfometer_segments():
     "color": "16/a",
 })
 
+perfometer_info.append(("stacked", [{
+    "type": "logarithmic",
+    "metric": "elasticsearch_size_rate",
+    "half_value": 5000,
+    "exponent": 2,
+}, {
+    "type": "logarithmic",
+    "metric": "elasticsearch_count_rate",
+    "half_value": 10,
+    "exponent": 2,
+}]))
+
+perfometer_info.append({
+    "type": "logarithmic",
+    "metric": "number_of_pending_tasks_rate",
+    "half_value": 10,
+    "exponent": 2,
+    "unit": "count",
+})
+
+perfometer_info.append(("dual", [{
+    "type": "linear",
+    "segments": ["active_primary_shards"],
+    "total": "active_shards",
+}, {
+    "type": "linear",
+    "segments": ["active_shards"],
+    "total": "active_shards",
+}]))
+
+perfometer_info.append({
+    "type": "linear",
+    "segments": ["active_shards_percent_as_number"],
+    "total": 100.0,
+})
+
 #.
 #   .--Graphs--------------------------------------------------------------.
 #   |                    ____                 _                            |
@@ -11107,3 +11293,11 @@ def get_skype_mobile_metrics():
         ("number_of_data_nodes", "area"),
     ],
 }
+
+graph_info["active_shards"] = {
+    "title": _("Active shards"),
+    "metrics": [
+        ("active_shards", "area"),
+        ("active_primary_shards", "area"),
+    ],
+}