/
nvidia
80 lines (72 loc) · 3.22 KB
/
nvidia
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2013 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# ails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
nvidia_temp_default_levels = (60, 65, 90, 95)
def inventory_nvidia_temp(info):
return [ (l[0].strip(":"), "nvidia_temp_default_levels")
for l in info
if l[0].strip(":").lower().endswith("temp") ]
def check_nvidia_temp(item, params, info):
for l in info:
if l[0].strip(":") == item:
temp = int(l[1])
if item == "GPUCoreTemp":
warn, crit = params[2:4]
else:
warn, crit = params[0:2]
infotext = "%dC - levels at %d/%d" % (temp, warn, crit)
perfdata = [ ("temp", temp, warn, crit ) ]
if temp >= crit:
return (2, infotext, perfdata)
elif temp >= warn:
return (1, infotext, perfdata)
else:
return (0, infotext, perfdata)
return (3, 'sensor not found in agent output')
check_info["nvidia.temp"] = {
'check_function': check_nvidia_temp,
'inventory_function': inventory_nvidia_temp,
'service_description': 'Temperature NVIDIA %s',
'has_perfdata': True,
}
def inventory_nvidia_errors(info):
for line in info:
if line[0] == 'GPUErrors:':
return [(None, None)]
def check_nvidia_errors(_no_item, _no_params, info):
for line in info:
if line[0] == "GPUErrors:":
errors = int(line[1])
if errors == 0:
return (0, "No GPU errors")
else:
return (2, "%d GPU errors" % errors)
return (3, "incomplete output from agent")
check_info["nvidia.errors"] = {
'check_function': check_nvidia_errors,
'inventory_function': inventory_nvidia_errors,
'service_description': 'NVIDIA GPU Errors',
'group': 'hw_errors',
}