From af8404cebb4821e0fa682e27aeb9714278af6096 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Fri, 19 Apr 2024 17:38:37 +0300 Subject: [PATCH 01/16] go.d add storcli collector (#17454) --- src/go/collectors/go.d.plugin/README.md | 3 +- .../collectors/go.d.plugin/config/go.d.conf | 1 + .../go.d.plugin/config/go.d/storcli.conf | 5 + src/go/collectors/go.d.plugin/modules/init.go | 1 + .../go.d.plugin/modules/storcli/charts.go | 171 +++++ .../go.d.plugin/modules/storcli/collect.go | 32 + .../modules/storcli/collect_controllers.go | 101 +++ .../modules/storcli/collect_drives.go | 231 ++++++ .../modules/storcli/config_schema.json | 35 + .../go.d.plugin/modules/storcli/exec.go | 50 ++ .../go.d.plugin/modules/storcli/init.go | 23 + .../go.d.plugin/modules/storcli/metadata.yaml | 153 ++++ .../go.d.plugin/modules/storcli/storcli.go | 109 +++ .../modules/storcli/storcli_test.go | 289 ++++++++ .../modules/storcli/testdata/config.json | 4 + .../modules/storcli/testdata/config.yaml | 2 + .../testdata/megaraid-controllers-info.json | 687 ++++++++++++++++++ .../testdata/megaraid-drives-info.json | 495 +++++++++++++ 18 files changed, 2390 insertions(+), 2 deletions(-) create mode 100644 src/go/collectors/go.d.plugin/config/go.d/storcli.conf create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/charts.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/collect.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/config_schema.json create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/exec.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/init.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/storcli.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/testdata/config.json create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/testdata/config.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-controllers-info.json create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-drives-info.json diff --git a/src/go/collectors/go.d.plugin/README.md b/src/go/collectors/go.d.plugin/README.md index 6dc519bee1baec..fc688ada017b24 100644 --- a/src/go/collectors/go.d.plugin/README.md +++ b/src/go/collectors/go.d.plugin/README.md @@ -114,9 +114,8 @@ see the appropriate collector readme. | [redis](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/redis) | Redis | | [scaleio](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/scaleio) | Dell EMC ScaleIO | | [SNMP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/snmp) | SNMP | -| [solr](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/solr) | Solr | | [squidlog](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/squidlog) | Squid | -| [springboot2](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/springboot2) | Spring Boot2 | +| [storcli](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/storcli) | Broadcom Hardware RAID | | [supervisord](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/supervisord) | Supervisor | | [systemdunits](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/systemdunits) | Systemd unit state | | [tengine](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/tengine) | Tengine | diff --git a/src/go/collectors/go.d.plugin/config/go.d.conf b/src/go/collectors/go.d.plugin/config/go.d.conf index 86fa940650a29a..ab3a5aca742d52 100644 --- a/src/go/collectors/go.d.plugin/config/go.d.conf +++ b/src/go/collectors/go.d.plugin/config/go.d.conf @@ -77,6 +77,7 @@ modules: # scaleio: yes # snmp: yes # squidlog: yes +# storcli: yes # supervisord: yes # systemdunits: yes # tengine: yes diff --git a/src/go/collectors/go.d.plugin/config/go.d/storcli.conf b/src/go/collectors/go.d.plugin/config/go.d/storcli.conf new file mode 100644 index 00000000000000..a4a9e3e0acce9e --- /dev/null +++ b/src/go/collectors/go.d.plugin/config/go.d/storcli.conf @@ -0,0 +1,5 @@ +## All available configuration options, their descriptions and default values: +## https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/storcli#readme + +jobs: + - name: storcli diff --git a/src/go/collectors/go.d.plugin/modules/init.go b/src/go/collectors/go.d.plugin/modules/init.go index 2790d30c2413b8..69e11617f7dc95 100644 --- a/src/go/collectors/go.d.plugin/modules/init.go +++ b/src/go/collectors/go.d.plugin/modules/init.go @@ -69,6 +69,7 @@ import ( _ "github.com/netdata/netdata/go/go.d.plugin/modules/scaleio" _ "github.com/netdata/netdata/go/go.d.plugin/modules/snmp" _ "github.com/netdata/netdata/go/go.d.plugin/modules/squidlog" + _ "github.com/netdata/netdata/go/go.d.plugin/modules/storcli" _ "github.com/netdata/netdata/go/go.d.plugin/modules/supervisord" _ "github.com/netdata/netdata/go/go.d.plugin/modules/systemdunits" _ "github.com/netdata/netdata/go/go.d.plugin/modules/tengine" diff --git a/src/go/collectors/go.d.plugin/modules/storcli/charts.go b/src/go/collectors/go.d.plugin/modules/storcli/charts.go new file mode 100644 index 00000000000000..65cd75a3319347 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/charts.go @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "fmt" + "strconv" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioControllerStatus = module.Priority + iota + prioControllerBBUStatus + + prioPhysDriveErrors + prioPhysDrivePredictiveFailures + prioPhysDriveSmartAlertStatus + prioPhysDriveTemperature +) + +var controllerChartsTmpl = module.Charts{ + controllerStatusChartTmpl.Copy(), + controllerBBUStatusChartTmpl.Copy(), +} + +var ( + controllerStatusChartTmpl = module.Chart{ + ID: "controller_%s_status", + Title: "Controller status", + Units: "status", + Fam: "cntrl status", + Ctx: "storcli.controller_status", + Type: module.Line, + Priority: prioControllerStatus, + Dims: module.Dims{ + {ID: "cntrl_%s_status_optimal", Name: "optimal"}, + {ID: "cntrl_%s_status_degraded", Name: "degraded"}, + {ID: "cntrl_%s_status_partially_degraded", Name: "partially_degraded"}, + {ID: "cntrl_%s_status_failed", Name: "failed"}, + }, + } + controllerBBUStatusChartTmpl = module.Chart{ + ID: "controller_%s_bbu_status", + Title: "Controller BBU status", + Units: "status", + Fam: "cntrl status", + Ctx: "storcli.controller_bbu_status", + Type: module.Line, + Priority: prioControllerBBUStatus, + Dims: module.Dims{ + {ID: "cntrl_%s_bbu_status_healthy", Name: "healthy"}, + {ID: "cntrl_%s_bbu_status_unhealthy", Name: "unhealthy"}, + {ID: "cntrl_%s_bbu_status_na", Name: "na"}, + }, + } +) + +var physDriveChartsTmpl = module.Charts{ + physDriveMediaErrorsRateChartTmpl.Copy(), + physDrivePredictiveFailuresRateChartTmpl.Copy(), + physDriveSmartAlertStatusChartTmpl.Copy(), + physDriveTemperatureChartTmpl.Copy(), +} + +var ( + physDriveMediaErrorsRateChartTmpl = module.Chart{ + ID: "phys_drive_%s_cntrl_%s_media_errors_rate", + Title: "Physical Drive media errors rate", + Units: "errors/s", + Fam: "pd errors", + Ctx: "storcli.phys_drive_errors", + Type: module.Line, + Priority: prioPhysDriveErrors, + Dims: module.Dims{ + {ID: "phys_drive_%s_cntrl_%s_media_error_count", Name: "media"}, + {ID: "phys_drive_%s_cntrl_%s_other_error_count", Name: "other"}, + }, + } + physDrivePredictiveFailuresRateChartTmpl = module.Chart{ + ID: "phys_drive_%s_cntrl_%s_predictive_failures_rate", + Title: "Physical Drive predictive failures rate", + Units: "failures/s", + Fam: "pd errors", + Ctx: "storcli.phys_drive_predictive_failures", + Type: module.Line, + Priority: prioPhysDrivePredictiveFailures, + Dims: module.Dims{ + {ID: "phys_drive_%s_cntrl_%s_predictive_failure_count", Name: "predictive_failures"}, + }, + } + physDriveSmartAlertStatusChartTmpl = module.Chart{ + ID: "phys_drive_%s_cntrl_%s_smart_alert_status", + Title: "Physical Drive SMART alert status", + Units: "status", + Fam: "pd smart", + Ctx: "storcli.phys_drive_smart_alert_status", + Type: module.Line, + Priority: prioPhysDriveSmartAlertStatus, + Dims: module.Dims{ + {ID: "phys_drive_%s_cntrl_%s_smart_alert_status_active", Name: "active"}, + {ID: "phys_drive_%s_cntrl_%s_smart_alert_status_inactive", Name: "inactive"}, + }, + } + physDriveTemperatureChartTmpl = module.Chart{ + ID: "phys_drive_%s_cntrl_%s_temperature", + Title: "Physical Drive temperature", + Units: "status", + Fam: "pd temperature", + Ctx: "storcli.phys_drive_temperature", + Type: module.Line, + Priority: prioPhysDriveTemperature, + Dims: module.Dims{ + {ID: "phys_drive_%s_cntrl_%s_temperature", Name: "temperature"}, + }, + } +) + +func (s *StorCli) addControllerCharts(cntrl controllerInfo) { + charts := controllerChartsTmpl.Copy() + + num := strconv.Itoa(cntrl.Basics.Controller) + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, num) + chart.Labels = []module.Label{ + {Key: "controller_number", Value: num}, + {Key: "model", Value: cntrl.Basics.Model}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, num) + } + } + + if err := s.Charts().Add(*charts...); err != nil { + s.Warning(err) + } +} + +func (s *StorCli) addPhysDriveCharts(cntrlNum int, di *driveInfo, ds *driveState, da *driveAttrs) { + charts := physDriveChartsTmpl.Copy() + + if _, ok := parseInt(getDriveTemperature(ds.DriveTemperature)); !ok { + _ = charts.Remove(physDriveTemperatureChartTmpl.ID) + } + + num := strconv.Itoa(cntrlNum) + + var enc, slot string + if parts := strings.Split(di.EIDSlt, ":"); len(parts) == 2 { // EID:Slt + enc, slot = parts[0], parts[1] + } + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, da.WWN, num) + chart.Labels = []module.Label{ + {Key: "controller_number", Value: num}, + {Key: "enclosure_number", Value: enc}, + {Key: "slot_number", Value: slot}, + {Key: "media_type", Value: di.Med}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, da.WWN, num) + } + } + + if err := s.Charts().Add(*charts...); err != nil { + s.Warning(err) + } +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect.go b/src/go/collectors/go.d.plugin/modules/storcli/collect.go new file mode 100644 index 00000000000000..d9b1c9af2fe5f2 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/collect.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import "fmt" + +func (s *StorCli) collect() (map[string]int64, error) { + cntrlResp, err := s.queryControllersInfo() + if err != nil { + return nil, err + } + + mx := make(map[string]int64) + + if err := s.collectControllersInfo(mx, cntrlResp); err != nil { + return nil, fmt.Errorf("error collecting controller info: %s", err) + } + + drives := cntrlResp.Controllers[0].ResponseData.PDList + driver := cntrlResp.Controllers[0].ResponseData.Version.DriverName + if driver == "megaraid_sas" && len(drives) > 0 { + drivesResp, err := s.queryDrivesInfo() + if err != nil { + return nil, fmt.Errorf("error collecting drives info: %s", err) + } + if err := s.collectMegaRaidDrives(mx, drivesResp); err != nil { + return nil, err + } + } + + return mx, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go new file mode 100644 index 00000000000000..259013e6c16810 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" +) + +type ( + controllersInfoResponse struct { + Controllers []struct { + CommandStatus struct { + Controller int `json:"Controller"` + Status string `json:"Status"` + } `json:"Command Status"` + ResponseData controllerInfo `json:"Response Data"` + } `json:"Controllers"` + } + controllerInfo struct { + Basics struct { + Controller int `json:"Controller"` + Model string `json:"Model"` + SerialNumber string `json:"Serial Number"` + } `json:"Basics"` + Version struct { + DriverName string `json:"Driver Name"` + } `json:"Version"` + Status struct { + ControllerStatus string `json:"Controller Status"` + BBUStatus storNumber `json:"BBU Status"` + } `json:"Status"` + BBUInfo []struct { + Model string `json:"Model"` + State string `json:"State"` + Temp string `json:"Temp"` + } `json:"BBU_Info"` + PDList []struct { + } `json:"PD LIST"` + } +) + +func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersInfoResponse) error { + for _, v := range resp.Controllers { + cntrl := v.ResponseData + + idx := strconv.Itoa(cntrl.Basics.Controller) + if !s.controllers[idx] { + s.controllers[idx] = true + s.addControllerCharts(cntrl) + } + + px := fmt.Sprintf("cntrl_%s_", idx) + + for _, st := range []string{"optimal", "degraded", "partially_degraded", "failed"} { + mx[px+"status_"+st] = 0 + } + mx[px+"status_"+strings.ToLower(cntrl.Status.ControllerStatus)] = 1 + + for _, st := range []string{"healthy", "unhealthy", "na"} { + mx[px+"bbu_status_"+st] = 0 + } + // https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/issues/27 + switch cntrl.Status.BBUStatus { + case "0", "8", "4096": // 0 good, 8 charging + mx[px+"bbu_status_healthy"] = 1 + case "NA", "N/A": + mx[px+"bbu_status_na"] = 1 + default: + mx[px+"bbu_status_unhealthy"] = 1 + } + } + return nil +} + +func (s *StorCli) queryControllersInfo() (*controllersInfoResponse, error) { + bs, err := s.exec.controllersInfo() + if err != nil { + return nil, err + } + + if len(bs) == 0 { + return nil, errors.New("empty response") + } + + var resp controllersInfoResponse + if err := json.Unmarshal(bs, &resp); err != nil { + return nil, err + } + if len(resp.Controllers) == 0 { + return nil, errors.New("no controllers found") + } + if st := resp.Controllers[0].CommandStatus.Status; st != "Success" { + return nil, fmt.Errorf("command status error: %s", st) + } + + return &resp, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go new file mode 100644 index 00000000000000..353728d6dcd79f --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" +) + +type drivesInfoResponse struct { + Controllers []struct { + CommandStatus struct { + Controller int `json:"Controller"` + Status string `json:"Status"` + } `json:"Command Status"` + ResponseData map[string]json.RawMessage `json:"Response Data"` + } `json:"Controllers"` +} + +type ( + driveInfo struct { + EIDSlt string `json:"EID:Slt"` + DID int `json:"DID"` + State string `json:"State"` + DG int `json:"DG"` + Size string `json:"Size"` + Intf string `json:"Intf"` + Med string `json:"Med"` + SED string `json:"SED"` + PI string `json:"PI"` + SeSz string `json:"SeSz"` + Model string `json:"Model"` + Sp string `json:"Sp"` + Type string `json:"Type"` + } + driveState struct { + MediaErrorCount storNumber `json:"Media Error Count"` + OtherErrorCount storNumber `json:"Other Error Count"` + DriveTemperature string `json:"Drive Temperature"` + PredictiveFailureCount storNumber `json:"Predictive Failure Count"` + SmartAlertFlagged string `json:"S.M.A.R.T alert flagged by drive"` + } + driveAttrs struct { + WWN string `json:"WWN"` + DeviceSpeed string `json:"Device Speed"` + LinkSpeed string `json:"Link Speed"` + } +) + +type storNumber string // some int values can be 'N/A' + +func (n *storNumber) UnmarshalJSON(b []byte) error { *n = storNumber(b); return nil } + +func (s *StorCli) collectMegaRaidDrives(mx map[string]int64, resp *drivesInfoResponse) error { + for _, cntrl := range resp.Controllers { + var ids []string + for k := range cntrl.ResponseData { + if !strings.HasSuffix(k, "Detailed Information") { + continue + } + parts := strings.Fields(k) // Drive /c0/e252/s0 - Detailed Information + if len(parts) < 2 { + continue + } + id := parts[1] + if strings.IndexByte(id, '/') == -1 { + continue + } + ids = append(ids, id) + } + + cntrlIdx := cntrl.CommandStatus.Controller + + for _, id := range ids { + info, err := getDriveInfo(cntrl.ResponseData, id) + if err != nil { + return err + } + data, err := getDriveDetailedInfo(cntrl.ResponseData, id) + if err != nil { + return err + } + state, err := getDriveState(data, id) + if err != nil { + return err + } + attrs, err := getDriveAttrs(data, id) + if err != nil { + return err + } + + if attrs.WWN == "" { + continue + } + + if !s.drives[attrs.WWN] { + s.drives[attrs.WWN] = true + s.addPhysDriveCharts(cntrlIdx, info, state, attrs) + } + + px := fmt.Sprintf("phys_drive_%s_cntrl_%d_", attrs.WWN, cntrlIdx) + + if v, ok := parseInt(string(state.MediaErrorCount)); ok { + mx[px+"media_error_count"] = v + } + if v, ok := parseInt(string(state.OtherErrorCount)); ok { + mx[px+"other_error_count"] = v + } + if v, ok := parseInt(string(state.PredictiveFailureCount)); ok { + mx[px+"predictive_failure_count"] = v + } + if v, ok := parseInt(getDriveTemperature(state.DriveTemperature)); ok { + mx[px+"temperature"] = v + } + for _, st := range []string{"active", "inactive"} { + mx[px+"smart_alert_status_"+st] = 0 + } + if state.SmartAlertFlagged == "Yes" { + mx[px+"smart_alert_status_active"] = 1 + } + } + } + + return nil +} + +func (s *StorCli) queryDrivesInfo() (*drivesInfoResponse, error) { + bs, err := s.exec.drivesInfo() + if err != nil { + return nil, err + } + + if len(bs) == 0 { + return nil, errors.New("empty response") + } + + var resp drivesInfoResponse + if err := json.Unmarshal(bs, &resp); err != nil { + return nil, err + } + + if len(resp.Controllers) == 0 { + return nil, errors.New("no controllers found") + } + if st := resp.Controllers[0].CommandStatus.Status; st != "Success" { + return nil, fmt.Errorf("command status error: %s", st) + } + + return &resp, nil +} + +func getDriveInfo(respData map[string]json.RawMessage, id string) (*driveInfo, error) { + k := fmt.Sprintf("Drive %s", id) + raw, ok := respData[k] + if !ok { + return nil, fmt.Errorf("drive info not found for '%s'", id) + } + + var drive []driveInfo + if err := json.Unmarshal(raw, &drive); err != nil { + return nil, err + } + + if len(drive) == 0 { + return nil, fmt.Errorf("drive info not found for '%s'", id) + } + + return &drive[0], nil +} + +func getDriveDetailedInfo(respData map[string]json.RawMessage, id string) (map[string]json.RawMessage, error) { + k := fmt.Sprintf("Drive %s - Detailed Information", id) + raw, ok := respData[k] + if !ok { + return nil, fmt.Errorf("drive detailed info not found for '%s'", id) + } + + var info map[string]json.RawMessage + if err := json.Unmarshal(raw, &info); err != nil { + return nil, err + } + + return info, nil +} + +func getDriveState(driveDetailedInfo map[string]json.RawMessage, id string) (*driveState, error) { + k := fmt.Sprintf("Drive %s State", id) + raw, ok := driveDetailedInfo[k] + if !ok { + return nil, fmt.Errorf("drive detailed info state not found for '%s'", id) + } + + var state driveState + if err := json.Unmarshal(raw, &state); err != nil { + return nil, err + } + + return &state, nil +} + +func getDriveAttrs(driveDetailedInfo map[string]json.RawMessage, id string) (*driveAttrs, error) { + k := fmt.Sprintf("Drive %s Device attributes", id) + raw, ok := driveDetailedInfo[k] + if !ok { + return nil, fmt.Errorf("drive detailed info state not found for '%s'", id) + } + + var state driveAttrs + if err := json.Unmarshal(raw, &state); err != nil { + return nil, err + } + + return &state, nil +} + +func getDriveTemperature(s string) string { + // ' 28C (82.40 F)' + i := strings.IndexByte(s, 'C') + if i == -1 { + return "" + } + return strings.TrimSpace(s[:i]) +} + +func parseInt(s string) (int64, bool) { + i, err := strconv.ParseInt(s, 10, 64) + return i, err == nil +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json b/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json new file mode 100644 index 00000000000000..226a370f437482 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json @@ -0,0 +1,35 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "StorCLI collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 10 + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for executing the binary, specified in seconds.", + "type": "number", + "minimum": 0.5, + "default": 2 + } + }, + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/exec.go b/src/go/collectors/go.d.plugin/modules/storcli/exec.go new file mode 100644 index 00000000000000..3375ddbe4f0d0c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/exec.go @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "context" + "fmt" + "os/exec" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/logger" +) + +func newStorCliExec(ndsudoPath string, timeout time.Duration, log *logger.Logger) *storCliExec { + return &storCliExec{ + Logger: log, + ndsudoPath: ndsudoPath, + timeout: timeout, + } +} + +type storCliExec struct { + *logger.Logger + + ndsudoPath string + timeout time.Duration +} + +func (e *storCliExec) controllersInfo() ([]byte, error) { + return e.execute("storcli-controllers-info") +} + +func (e *storCliExec) drivesInfo() ([]byte, error) { + return e.execute("storcli-drives-info") +} + +func (e *storCliExec) execute(args ...string) ([]byte, error) { + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, e.ndsudoPath, args...) + e.Debugf("executing '%s'", cmd) + + bs, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("error on '%s': %v", cmd, err) + } + + return bs, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/init.go b/src/go/collectors/go.d.plugin/modules/storcli/init.go new file mode 100644 index 00000000000000..297f7c8c3e3a04 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/init.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/netdata/netdata/go/go.d.plugin/agent/executable" +) + +func (s *StorCli) initStorCliExec() (storCli, error) { + ndsudoPath := filepath.Join(executable.Directory, "ndsudo") + + if _, err := os.Stat(ndsudoPath); err != nil { + return nil, fmt.Errorf("ndsudo executable not found: %v", err) + } + + storExec := newStorCliExec(ndsudoPath, s.Timeout.Duration(), s.Logger) + + return storExec, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml new file mode 100644 index 00000000000000..ecf97fb4420eb2 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml @@ -0,0 +1,153 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-storcli + plugin_name: go.d.plugin + module_name: storcli + monitored_instance: + name: StoreCLI RAID + link: "https://docs.broadcom.com/doc/12352476" + icon_filename: "hard-drive.svg" + categories: + - data-collection.storage-mount-points-and-filesystems + keywords: + - storage + - raid-controller + - manage-disks + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + most_popular: false + overview: + data_collection: + metrics_description: | + Monitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system. + It relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary. + Instead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment. + This approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management. + + Executed commands: + - `storcli /cALL show all J nolog` + - `storcli /cALL/eALL/sALL show all J nolog` + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/storcli.conf + options: + description: | + The following options can be defined globally: update_every. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 10 + required: false + - name: timeout + description: storcli binary execution timeout. + default_value: 2 + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Custom update_every + description: Allows you to override the default data collection interval. + config: | + jobs: + - name: storcli + update_every: 5 # Collect StorCLI RAID statistics every 5 seconds + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: controller + description: These metrics refer to the Controller. + labels: + - name: controller_number + description: Controller number (index) + - name: model + description: Controller model + metrics: + - name: storcli.controller_status + description: Controller status + unit: status + chart_type: line + dimensions: + - name: optimal + - name: degraded + - name: partially_degraded + - name: failed + - name: storcli.controller_bbu_status + description: Controller BBU status + unit: status + chart_type: line + dimensions: + - name: healthy + - name: unhealthy + - name: na + - name: physical drive + description: These metrics refer to the Physical Drive. + labels: + - name: controller_number + description: Controller number (index) + - name: enclosure_number + description: Enclosure number (index) + - name: slot_number + description: Slot number (index) + - name: media type + description: Media type (e.g. HDD) + metrics: + - name: storcli.phys_drive_errors + description: Physical Drive media errors rate + unit: errors/s + chart_type: line + dimensions: + - name: media + - name: other + - name: storcli.phys_drive_predictive_failures + description: Physical Drive predictive failures rate + unit: failures/s + chart_type: line + dimensions: + - name: predictive_failures + - name: storcli.phys_drive_smart_alert_status + description: Physical Drive SMART alert status + unit: status + chart_type: line + dimensions: + - name: active + - name: inactive + - name: storcli.phys_drive_temperature + description: Physical Drive temperature + unit: status + chart_type: line + dimensions: + - name: temperature diff --git a/src/go/collectors/go.d.plugin/modules/storcli/storcli.go b/src/go/collectors/go.d.plugin/modules/storcli/storcli.go new file mode 100644 index 00000000000000..3122803b69ae11 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/storcli.go @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("storcli", module.Creator{ + JobConfigSchema: configSchema, + Defaults: module.Defaults{ + UpdateEvery: 10, + }, + Create: func() module.Module { return New() }, + }) +} + +func New() *StorCli { + return &StorCli{ + Config: Config{ + Timeout: web.Duration(time.Second * 2), + }, + charts: &module.Charts{}, + controllers: make(map[string]bool), + drives: make(map[string]bool), + bbu: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` +} + +type ( + StorCli struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + exec storCli + + controllers map[string]bool + drives map[string]bool + bbu map[string]bool + } + storCli interface { + controllersInfo() ([]byte, error) + drivesInfo() ([]byte, error) + } +) + +func (s *StorCli) Configuration() any { + return s.Config +} + +func (s *StorCli) Init() error { + storExec, err := s.initStorCliExec() + if err != nil { + s.Errorf("storcli exec initialization: %v", err) + return err + } + s.exec = storExec + + return nil +} + +func (s *StorCli) Check() error { + mx, err := s.collect() + if err != nil { + s.Error(err) + return err + } + + if len(mx) == 0 { + return errors.New("no metrics collected") + } + + return nil +} + +func (s *StorCli) Charts() *module.Charts { + return s.charts +} + +func (s *StorCli) Collect() map[string]int64 { + mx, err := s.collect() + if err != nil { + s.Error(err) + } + + if len(mx) == 0 { + return nil + } + + return mx +} + +func (s *StorCli) Cleanup() {} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go b/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go new file mode 100644 index 00000000000000..84ea3bb4c3725d --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package storcli + +import ( + "errors" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") + + dataMegaControllerInfo, _ = os.ReadFile("testdata/megaraid-controllers-info.json") + dataMegaDrivesInfo, _ = os.ReadFile("testdata/megaraid-drives-info.json") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + + "dataMegaControllerInfo": dataMegaControllerInfo, + "dataMegaDrivesInfo": dataMegaDrivesInfo, + } { + require.NotNil(t, data, name) + } +} + +func TestStorCli_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &StorCli{}, dataConfigJSON, dataConfigYAML) +} + +func TestStorCli_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "fails if 'ndsudo' not found": { + wantFail: true, + config: New().Config, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + stor := New() + + if test.wantFail { + assert.Error(t, stor.Init()) + } else { + assert.NoError(t, stor.Init()) + } + }) + } +} + +func TestStorCli_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func() *StorCli + }{ + "not initialized exec": { + prepare: func() *StorCli { + return New() + }, + }, + "after check": { + prepare: func() *StorCli { + stor := New() + stor.exec = prepareMockMegaRaidOK() + _ = stor.Check() + return stor + }, + }, + "after collect": { + prepare: func() *StorCli { + stor := New() + stor.exec = prepareMockMegaRaidOK() + _ = stor.Collect() + return stor + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + stor := test.prepare() + + assert.NotPanics(t, stor.Cleanup) + }) + } +} + +func TestStorCli_Charts(t *testing.T) { + assert.NotNil(t, New().Charts()) +} + +func TestStorCli_Check(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockStorCliExec + wantFail bool + }{ + "success MegaRAID controller": { + wantFail: false, + prepareMock: prepareMockMegaRaidOK, + }, + "err on exec": { + wantFail: true, + prepareMock: prepareMockErr, + }, + "unexpected response": { + wantFail: true, + prepareMock: prepareMockUnexpectedResponse, + }, + "empty response": { + wantFail: true, + prepareMock: prepareMockEmptyResponse, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + stor := New() + mock := test.prepareMock() + stor.exec = mock + + if test.wantFail { + assert.Error(t, stor.Check()) + } else { + assert.NoError(t, stor.Check()) + } + }) + } +} + +func TestStorCli_Collect(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockStorCliExec + wantMetrics map[string]int64 + wantCharts int + }{ + "success MegaRAID controller": { + prepareMock: prepareMockMegaRaidOK, + wantCharts: len(controllerChartsTmpl)*1 + len(physDriveChartsTmpl)*6, + wantMetrics: map[string]int64{ + "cntrl_0_bbu_status_healthy": 1, + "cntrl_0_bbu_status_na": 0, + "cntrl_0_bbu_status_unhealthy": 0, + "cntrl_0_status_degraded": 0, + "cntrl_0_status_failed": 0, + "cntrl_0_status_optimal": 1, + "cntrl_0_status_partially_degraded": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_media_error_count": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_other_error_count": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_temperature": 28, + "phys_drive_5000C500D59840FE_cntrl_0_media_error_count": 0, + "phys_drive_5000C500D59840FE_cntrl_0_other_error_count": 0, + "phys_drive_5000C500D59840FE_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500D59840FE_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500D59840FE_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500D59840FE_cntrl_0_temperature": 28, + "phys_drive_5000C500D6061539_cntrl_0_media_error_count": 0, + "phys_drive_5000C500D6061539_cntrl_0_other_error_count": 0, + "phys_drive_5000C500D6061539_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500D6061539_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500D6061539_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500D6061539_cntrl_0_temperature": 28, + "phys_drive_5000C500DC79B194_cntrl_0_media_error_count": 0, + "phys_drive_5000C500DC79B194_cntrl_0_other_error_count": 0, + "phys_drive_5000C500DC79B194_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500DC79B194_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500DC79B194_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500DC79B194_cntrl_0_temperature": 28, + "phys_drive_5000C500E54F4EBB_cntrl_0_media_error_count": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_other_error_count": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_temperature": 28, + "phys_drive_5000C500E5659BA7_cntrl_0_media_error_count": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_other_error_count": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_predictive_failure_count": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_smart_alert_status_active": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_temperature": 27, + }, + }, + "err on exec": { + prepareMock: prepareMockErr, + wantMetrics: nil, + }, + "unexpected response": { + prepareMock: prepareMockUnexpectedResponse, + wantMetrics: nil, + }, + "empty response": { + prepareMock: prepareMockEmptyResponse, + wantMetrics: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + stor := New() + mock := test.prepareMock() + stor.exec = mock + + mx := stor.Collect() + + assert.Equal(t, test.wantMetrics, mx) + assert.Len(t, *stor.Charts(), test.wantCharts) + testMetricsHasAllChartsDims(t, stor, mx) + }) + } +} + +func prepareMockMegaRaidOK() *mockStorCliExec { + return &mockStorCliExec{ + controllersInfoData: dataMegaControllerInfo, + drivesInfoData: dataMegaDrivesInfo, + } +} + +func prepareMockErr() *mockStorCliExec { + return &mockStorCliExec{ + errOnInfo: true, + } +} + +func prepareMockUnexpectedResponse() *mockStorCliExec { + resp := []byte(` +Lorem ipsum dolor sit amet, consectetur adipiscing elit. +Nulla malesuada erat id magna mattis, eu viverra tellus rhoncus. +Fusce et felis pulvinar, posuere sem non, porttitor eros. +`) + return &mockStorCliExec{ + controllersInfoData: resp, + drivesInfoData: resp, + } +} + +func prepareMockEmptyResponse() *mockStorCliExec { + return &mockStorCliExec{} +} + +type mockStorCliExec struct { + errOnInfo bool + controllersInfoData []byte + drivesInfoData []byte +} + +func (m *mockStorCliExec) controllersInfo() ([]byte, error) { + if m.errOnInfo { + return nil, errors.New("mock.controllerInfo() error") + } + return m.controllersInfoData, nil +} + +func (m *mockStorCliExec) drivesInfo() ([]byte, error) { + if m.errOnInfo { + return nil, errors.New("mock.drivesInfo() error") + } + return m.drivesInfoData, nil +} + +func testMetricsHasAllChartsDims(t *testing.T, stor *StorCli, mx map[string]int64) { + for _, chart := range *stor.Charts() { + if chart.Obsolete { + continue + } + for _, dim := range chart.Dims { + _, ok := mx[dim.ID] + assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) + } + for _, v := range chart.Vars { + _, ok := mx[v.ID] + assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.json b/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.json new file mode 100644 index 00000000000000..291ecee3d63d06 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.json @@ -0,0 +1,4 @@ +{ + "update_every": 123, + "timeout": 123.123 +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.yaml new file mode 100644 index 00000000000000..25b0b4c780de56 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/testdata/config.yaml @@ -0,0 +1,2 @@ +update_every: 123 +timeout: 123.123 diff --git a/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-controllers-info.json b/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-controllers-info.json new file mode 100644 index 00000000000000..e4e988d101766f --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-controllers-info.json @@ -0,0 +1,687 @@ +{ + "Controllers": [ + { + "Command Status": { + "CLI Version": "007.2807.0000.0000 Dec 22, 2023", + "Operating system": "Linux 6.5.13-1-pve", + "Controller": 0, + "Status": "Success", + "Description": "None" + }, + "Response Data": { + "Basics": { + "Controller": 0, + "Model": "ServeRAID M5015 SAS/SATA Controller", + "Serial Number": "SV04616189", + "Current Controller Date/Time": "04/17/2024, 18:28:30", + "Current System Date/time": "04/17/2024, 18:30:05", + "SAS Address": "500605b002e04f10", + "PCI Address": "00:0a:00:00", + "Mfg Date": "11/11/10", + "Rework Date": "00/00/00", + "Revision No": "" + }, + "Version": { + "Firmware Package Build": "12.15.0-0239", + "Firmware Version": "2.130.403-4660", + "Bios Version": "3.30.02.2_4.16.08.00_0x06060A05", + "Preboot CLI Version": "04.04-020:#%00009", + "WebBIOS Version": "6.0-54-e_50-Rel", + "NVDATA Version": "2.09.03-0058", + "Boot Block Version": "2.02.00.00-0000", + "Bootloader Version": "09.250.01.219", + "Driver Name": "megaraid_sas", + "Driver Version": "07.725.01.00-rc1" + }, + "Bus": { + "Vendor Id": 4096, + "Device Id": 121, + "SubVendor Id": 4116, + "SubDevice Id": 946, + "Host Interface": "PCI-E", + "Device Interface": "SAS-6G", + "Bus Number": 10, + "Device Number": 0, + "Function Number": 0, + "Domain ID": 0 + }, + "Pending Images in Flash": { + "Image name": "No pending images" + }, + "Status": { + "Controller Status": "Optimal", + "Memory Correctable Errors": 0, + "Memory Uncorrectable Errors": 0, + "ECC Bucket Count": 0, + "Any Offline VD Cache Preserved": "No", + "BBU Status": 0, + "PD Firmware Download in progress": "No", + "Support PD Firmware Download": "Yes", + "Lock Key Assigned": "No", + "Failed to get lock key on bootup": "No", + "Lock key has not been backed up": "No", + "Bios was not detected during boot": "No", + "Controller must be rebooted to complete security operation": "No", + "A rollback operation is in progress": "No", + "At least one PFK exists in NVRAM": "No", + "SSC Policy is WB": "No", + "Controller has booted into safe mode": "No", + "Controller shutdown required": "No", + "Controller has booted into certificate provision mode": "No" + }, + "Supported Adapter Operations": { + "Rebuild Rate": "Yes", + "CC Rate": "Yes", + "BGI Rate ": "Yes", + "Reconstruct Rate": "Yes", + "Patrol Read Rate": "Yes", + "Alarm Control": "Yes", + "Cluster Support": "No", + "BBU": "Yes", + "Spanning": "Yes", + "Dedicated Hot Spare": "Yes", + "Revertible Hot Spares": "Yes", + "Foreign Config Import": "Yes", + "Self Diagnostic": "Yes", + "Allow Mixed Redundancy on Array": "No", + "Global Hot Spares": "Yes", + "Deny SCSI Passthrough": "No", + "Deny SMP Passthrough": "No", + "Deny STP Passthrough": "No", + "Support more than 8 Phys": "Yes", + "FW and Event Time in GMT": "No", + "Support Enhanced Foreign Import": "Yes", + "Support Enclosure Enumeration": "Yes", + "Support Allowed Operations": "Yes", + "Abort CC on Error": "Yes", + "Support Multipath": "Yes", + "Support Odd & Even Drive count in RAID1E": "No", + "Support Security": "Yes", + "Support Config Page Model": "Yes", + "Support the OCE without adding drives": "Yes", + "Support EKM": "Yes", + "Snapshot Enabled": "Yes", + "Support PFK": "No", + "Support PI": "No", + "Support Ld BBM Info": "No", + "Support Shield State": "No", + "Block SSD Write Disk Cache Change": "No", + "Support Suspend Resume BG ops": "No", + "Support Emergency Spares": "Yes", + "Support Set Link Speed": "No", + "Support Boot Time PFK Change": "No", + "Support JBOD": "No", + "Disable Online PFK Change": "No", + "Support Perf Tuning": "No", + "Support SSD PatrolRead": "Yes", + "Real Time Scheduler": "Yes", + "Support Reset Now": "Yes", + "Support Emulated Drives": "No", + "Headless Mode": "Yes", + "Dedicated HotSpares Limited": "No", + "Point In Time Progress": "No", + "Extended LD": "No", + "Support Uneven span ": "No", + "Support Config Auto Balance": "No", + "Support Maintenance Mode": "No", + "Support Diagnostic results": "No", + "Support Ext Enclosure": "No", + "Support Sesmonitoring": "No", + "Support SecurityonJBOD": "No", + "Support ForceFlash": "No", + "Support DisableImmediateIO": "Yes", + "Support LargeIOSupport": "No", + "Support DrvActivityLEDSetting": "Yes", + "Support FlushWriteVerify": "No", + "Support CPLDUpdate": "No", + "Support ForceTo512e": "No", + "Support discardCacheDuringLDDelete": "No", + "Support JBOD Write cache": "No", + "Support Large QD Support": "No", + "Support Ctrl Info Extended": "No", + "Support IButton less": "No", + "Support AES Encryption Algorithm": "No", + "Support Encrypted MFC": "No", + "Support Snapdump": "No", + "Support Force Personality Change": "No", + "Support Dual Fw Image": "No", + "Support PSOC Update": "No", + "Support Secure Boot": "No", + "Support Debug Queue": "No", + "Support Least Latency Mode": "Yes", + "Support OnDemand Snapdump": "No", + "Support Clear Snapdump": "No", + "Support PHY current speed": "No", + "Support Lane current speed": "No", + "Support NVMe Width": "No", + "Support Lane DeviceType": "No", + "Support Extended Drive performance Monitoring": "No", + "Support NVMe Repair": "No", + "Support Platform Security": "No", + "Support None Mode Params": "No", + "Support Extended Controller Property": "No", + "Support Smart Poll Interval for DirectAttached": "No", + "Support Write Journal Pinning": "No", + "Support SMP Passthru with Port Number": "No", + "Support SnapDump Preboot Trace Buffer Toggle": "No", + "Support Parity Read Cache Bypass": "No", + "Support NVMe Init Error Device ConnectorIndex": "No", + "Support VolatileKey": "No", + "Support PSOC Part Information": "No", + "Support Slow array threshold calculation": "No", + "Support PCIe Reference Clock override": "No", + "Support PCIe PERST override": "No", + "Support Drive FW Download Mask": "No", + "Support Start of day PL log capture": "No", + "Support Drive Unrecovered Medium Error Count": "No" + }, + "Enterprise Key management": { + "Capability": "Supported", + "Boot Agent": "Not Available", + "Configured": "No" + }, + "Supported PD Operations": { + "Force Online": "Yes", + "Force Offline": "Yes", + "Force Rebuild": "Yes", + "Deny Force Failed": "No", + "Deny Force Good/Bad": "No", + "Deny Missing Replace": "No", + "Deny Clear": "No", + "Deny Locate": "No", + "Support Power State": "No", + "Set Power State For Cfg": "No", + "Support T10 Power State": "No", + "Support Temperature": "Yes", + "NCQ": "No", + "Support Max Rate SATA": "No", + "Support Degraded Media": "No", + "Support Parallel FW Update": "No", + "Support Drive Crypto Erase": "No", + "Support SSD Wear Gauge": "No", + "Support Sanitize": "No", + "Support Extended Sanitize": "No" + }, + "Supported VD Operations": { + "Read Policy": "Yes", + "Write Policy": "Yes", + "IO Policy": "Yes", + "Access Policy": "Yes", + "Disk Cache Policy": "Yes", + "Reconstruction": "Yes", + "Deny Locate": "No", + "Deny CC": "No", + "Allow Ctrl Encryption": "No", + "Enable LDBBM": "No", + "Support FastPath": "Yes", + "Performance Metrics": "Yes", + "Power Savings": "No", + "Support Powersave Max With Cache": "No", + "Support Breakmirror": "No", + "Support SSC WriteBack": "Yes", + "Support SSC Association": "Yes", + "Support VD Hide": "No", + "Support VD Cachebypass": "No", + "Support VD discardCacheDuringLDDelete": "No", + "Support VD Scsi Unmap": "No" + }, + "HwCfg": { + "ChipRevision": " B2", + "BatteryFRU": "N/A", + "Front End Port Count": 0, + "Backend Port Count": 8, + "BBU": "Present", + "Alarm": "Disable", + "Serial Debugger": "Present", + "NVRAM Size": "32KB", + "Flash Size": "8MB", + "On Board Memory Size": "512MB", + "CacheVault Flash Size": "NA", + "TPM": "Absent", + "Upgrade Key": "Present", + "On Board Expander": "Absent", + "Temperature Sensor for ROC": "Absent", + "Temperature Sensor for Controller": "Absent", + "Upgradable CPLD": "Absent", + "Upgradable PSOC": "Absent", + "Current Size of CacheCade (GB)": 0, + "Current Size of FW Cache (MB)": 349 + }, + "Policies": { + "Policies Table": [ + { + "Policy": "Predictive Fail Poll Interval", + "Current": "300 sec", + "Default": "" + }, + { + "Policy": "Interrupt Throttle Active Count", + "Current": "16", + "Default": "" + }, + { + "Policy": "Interrupt Throttle Completion", + "Current": "50 us", + "Default": "" + }, + { + "Policy": "Rebuild Rate", + "Current": "30 %", + "Default": "30%" + }, + { + "Policy": "PR Rate", + "Current": "30 %", + "Default": "30%" + }, + { + "Policy": "BGI Rate", + "Current": "30 %", + "Default": "30%" + }, + { + "Policy": "Check Consistency Rate", + "Current": "30 %", + "Default": "30%" + }, + { + "Policy": "Reconstruction Rate", + "Current": "30 %", + "Default": "30%" + }, + { + "Policy": "Cache Flush Interval", + "Current": "4s", + "Default": "" + } + ], + "Flush Time(Default)": "4s", + "Drive Coercion Mode": "1GB", + "Auto Rebuild": "On", + "Battery Warning": "On", + "ECC Bucket Size": 15, + "ECC Bucket Leak Rate (hrs)": 24, + "Restore Hot Spare on Insertion": "Off", + "Expose Enclosure Devices": "On", + "Maintain PD Fail History": "On", + "Reorder Host Requests": "On", + "Auto detect BackPlane": "SGPIO/i2c SEP", + "Load Balance Mode": "Auto", + "Security Key Assigned": "Off", + "Disable Online Controller Reset": "Off", + "Use drive activity for locate": "Off" + }, + "Boot": { + "BIOS Enumerate VDs": 1, + "Stop BIOS on Error": "On", + "Delay during POST": 4, + "Spin Down Mode": "None", + "Enable Ctrl-R": "No", + "Enable Web BIOS": "Yes", + "Enable PreBoot CLI": "Yes", + "Enable BIOS": "Yes", + "Max Drives to Spinup at One Time": 4, + "Maximum number of direct attached drives to spin up in 1 min": 20, + "Delay Among Spinup Groups (sec)": 12, + "Allow Boot with Preserved Cache": "Off" + }, + "High Availability": { + "Topology Type": "None", + "Cluster Permitted": "No", + "Cluster Active": "No" + }, + "Defaults": { + "Phy Polarity": 0, + "Phy PolaritySplit": 0, + "Strip Size": "128 KB", + "Write Policy": "WB", + "Read Policy": "No Read Ahead", + "Cache When BBU Bad": "Off", + "Cached IO": "Off", + "VD PowerSave Policy": "Controller Defined", + "Default spin down time (mins)": 30, + "Coercion Mode": "1 GB", + "ZCR Config": "Unknown", + "Max Chained Enclosures": 16, + "Direct PD Mapping": "No", + "Restore Hot Spare on Insertion": "No", + "Expose Enclosure Devices": "Yes", + "Maintain PD Fail History": "Yes", + "Zero Based Enclosure Enumeration": "No", + "Disable Puncturing": "Yes", + "EnableLDBBM": "No", + "DisableHII": "No", + "Un-Certified Hard Disk Drives": "Allow", + "SMART Mode": "Mode 6", + "Enable LED Header": "No", + "LED Show Drive Activity": "No", + "Dirty LED Shows Drive Activity": "No", + "EnableCrashDump": "No", + "Disable Online Controller Reset": "No", + "Treat Single span R1E as R10": "No", + "Power Saving option": "Enabled", + "TTY Log In Flash": "No", + "Auto Enhanced Import": "No", + "BreakMirror RAID Support": "No", + "Disable Join Mirror": "No", + "Enable Shield State": "No", + "Time taken to detect CME": "60 sec" + }, + "Capabilities": { + "Supported Drives": "SAS, SATA", + "RAID Level Supported": "RAID0, RAID1(2 or more drives), RAID5, RAID6, RAID00, RAID10(2 or more drives per span), RAID50, RAID60", + "Enable JBOD": "No", + "Mix in Enclosure": "Allowed", + "Mix of SAS/SATA of HDD type in VD": "Not Allowed", + "Mix of SAS/SATA of SSD type in VD": "Not Allowed", + "Mix of SSD/HDD in VD": "Not Allowed", + "SAS Disable": "No", + "Max Arms Per VD": 32, + "Max Spans Per VD": 8, + "Max Arrays": 128, + "Max VD per array": 16, + "Max Number of VDs": 64, + "Max Parallel Commands": 1008, + "Max SGE Count": 60, + "Max Data Transfer Size": "8192 sectors", + "Max Strips PerIO": 42, + "Max Configurable CacheCade Size(GB)": 512, + "Max Transportable DGs": 0, + "Enable Snapdump": "No", + "Enable SCSI Unmap": "Yes", + "Read cache bypass enabled for Parity RAID LDs": "No", + "FDE Drive Mix Support": "No", + "Min Strip Size": "8 KB", + "Max Strip Size": "1.000 MB" + }, + "Scheduled Tasks": { + "Consistency Check Reoccurrence": "168 hrs", + "Next Consistency check launch": "04/20/2024, 03:00:00", + "Patrol Read Reoccurrence": "168 hrs", + "Next Patrol Read launch": "04/20/2024, 03:00:00", + "Battery learn Reoccurrence": "672 hrs", + "Next Battery Learn": "04/18/2024, 18:32:56", + "OEMID": "Lenovo" + }, + "Security Protocol properties": { + "Security Protocol": "None" + }, + "Drive Groups": 1, + "TOPOLOGY": [ + { + "DG": 0, + "Arr": "-", + "Row": "-", + "EID:Slot": "-", + "DID": "-", + "Type": "RAID6", + "State": "Optl", + "BT": "N", + "Size": "58.207 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "N", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": "-", + "EID:Slot": "-", + "DID": "-", + "Type": "RAID6", + "State": "Optl", + "BT": "N", + "Size": "58.207 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "N", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 0, + "EID:Slot": "252:3", + "DID": 35, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 1, + "EID:Slot": "252:5", + "DID": 31, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 2, + "EID:Slot": "252:4", + "DID": 30, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 3, + "EID:Slot": "252:7", + "DID": 32, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 4, + "EID:Slot": "252:0", + "DID": 34, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + }, + { + "DG": 0, + "Arr": 0, + "Row": 5, + "EID:Slot": "252:1", + "DID": 33, + "Type": "DRIVE", + "State": "Onln", + "BT": "N", + "Size": "14.551 TB", + "PDC": "dsbl", + "PI": "N", + "SED": "N", + "DS3": "none", + "FSpace": "-", + "TR": "N" + } + ], + "Virtual Drives": 1, + "VD LIST": [ + { + "DG/VD": "0/0", + "TYPE": "RAID6", + "State": "Optl", + "Access": "RW", + "Consist": "Yes", + "Cache": "RWBD", + "Cac": "-", + "sCC": "ON", + "Size": "58.207 TB", + "Name": "Sluthub" + } + ], + "Physical Drives": 6, + "PD LIST": [ + { + "EID:Slt": "252:0", + "DID": 34, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + }, + { + "EID:Slt": "252:1", + "DID": 33, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + }, + { + "EID:Slt": "252:3", + "DID": 35, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + }, + { + "EID:Slt": "252:4", + "DID": 30, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + }, + { + "EID:Slt": "252:5", + "DID": 31, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + }, + { + "EID:Slt": "252:7", + "DID": 32, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Enclosures": 1, + "Enclosure LIST": [ + { + "EID": 252, + "State": "OK", + "Slots": 8, + "PD": 6, + "PS": 0, + "Fans": 0, + "TSs": 0, + "Alms": 0, + "SIM": 1, + "Port#": "-", + "ProdID": "SGPIO", + "VendorSpecific": " " + } + ], + "BBU_Info": [ + { + "Model": "iBBU08", + "State": "Optimal", + "RetentionTime": "48 hours +", + "Temp": "34C", + "Mode": "4", + "MfgDate": "2011/03/18", + "Next Learn": "2024/04/18 18:32:56" + } + ] + } + } + ] +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-drives-info.json b/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-drives-info.json new file mode 100644 index 00000000000000..b8735d6a3230d9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-drives-info.json @@ -0,0 +1,495 @@ +{ + "Controllers": [ + { + "Command Status": { + "CLI Version": "007.2807.0000.0000 Dec 22, 2023", + "Operating system": "Linux 6.5.13-1-pve", + "Controller": 0, + "Status": "Success", + "Description": "Show Drive Information Succeeded." + }, + "Response Data": { + "Drive /c0/e252/s0": [ + { + "EID:Slt": "252:0", + "DID": 34, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s0 - Detailed Information": { + "Drive /c0/e252/s0 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 28C (82.40 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s0 Device attributes": { + "SN": " ZL2PVFA8", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500E54F4EBB", + "Firmware Revision": "SN03 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s0 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:4", + "Enclosure position": "1", + "Connected Port Number": "1(path0) ", + "Sequence Number": 2, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221103000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 5a 50 32 46 56 38 41 00 00 00 00 00 00 4e 53 33 30 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + }, + "Drive /c0/e252/s1": [ + { + "EID:Slt": "252:1", + "DID": 33, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s1 - Detailed Information": { + "Drive /c0/e252/s1 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 27C (80.60 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s1 Device attributes": { + "SN": " ZL2PY6LF", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500E5659BA7", + "Firmware Revision": "SN03 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s1 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:5", + "Enclosure position": "1", + "Connected Port Number": "2(path0) ", + "Sequence Number": 2, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221102000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 5a 50 32 36 59 46 4c 00 00 00 00 00 00 4e 53 33 30 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + }, + "Drive /c0/e252/s3": [ + { + "EID:Slt": "252:3", + "DID": 35, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s3 - Detailed Information": { + "Drive /c0/e252/s3 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 28C (82.40 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s3 Device attributes": { + "SN": " ZL2M2WQ3", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500DC79B194", + "Firmware Revision": "SN03 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s3 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:0", + "Enclosure position": "1", + "Connected Port Number": "0(path0) ", + "Sequence Number": 2, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221100000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 5a 4d 32 57 32 33 51 00 00 00 00 00 00 4e 53 33 30 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + }, + "Drive /c0/e252/s4": [ + { + "EID:Slt": "252:4", + "DID": 30, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s4 - Detailed Information": { + "Drive /c0/e252/s4 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 28C (82.40 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s4 Device attributes": { + "SN": " WL201HYL", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500D59840FE", + "Firmware Revision": "SN04 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s4 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:2", + "Enclosure position": "1", + "Connected Port Number": "3(path0) ", + "Sequence Number": 2, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221104000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 57 30 32 48 31 4c 59 00 00 00 00 00 00 4e 53 34 30 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + }, + "Drive /c0/e252/s5": [ + { + "EID:Slt": "252:5", + "DID": 31, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s5 - Detailed Information": { + "Drive /c0/e252/s5 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 28C (82.40 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s5 Device attributes": { + "SN": " ZL21DC50", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500C36C8BCD", + "Firmware Revision": "SN04 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s5 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:1", + "Enclosure position": "1", + "Connected Port Number": "4(path0) ", + "Sequence Number": 2, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221105000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 5a 31 32 43 44 30 35 00 00 00 00 00 00 4e 53 34 30 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + }, + "Drive /c0/e252/s7": [ + { + "EID:Slt": "252:7", + "DID": 32, + "State": "Onln", + "DG": 0, + "Size": "14.551 TB", + "Intf": "SATA", + "Med": "HDD", + "SED": "N", + "PI": "N", + "SeSz": "512B", + "Model": "ST16000NM001G-2KK103", + "Sp": "U", + "Type": "-" + } + ], + "Drive /c0/e252/s7 - Detailed Information": { + "Drive /c0/e252/s7 State": { + "Shield Counter": 0, + "Media Error Count": 0, + "Other Error Count": 0, + "BBM Error Count": 0, + "Drive Temperature": " 28C (82.40 F)", + "Predictive Failure Count": 0, + "S.M.A.R.T alert flagged by drive": "No" + }, + "Drive /c0/e252/s7 Device attributes": { + "SN": " WL204LF2", + "Manufacturer Id": "ATA ", + "Model Number": "ST16000NM001G-2KK103", + "NAND Vendor": "NA", + "WWN": "5000C500D6061539", + "Firmware Revision": "SB30 ", + "Raw size": "14.552 TB [0x746c00000 Sectors]", + "Coerced size": "14.551 TB [0x746a52800 Sectors]", + "Non Coerced size": "14.551 TB [0x746b00000 Sectors]", + "Device Speed": "6.0Gb/s", + "Link Speed": "6.0Gb/s", + "NCQ setting": "N/A", + "Write Cache": "N/A", + "Logical Sector Size": "512B", + "Physical Sector Size": "512B", + "Connector Name": "" + }, + "Drive /c0/e252/s7 Policies/Settings": { + "Drive position": "DriveGroup:0, Span:0, Row:3", + "Enclosure position": "1", + "Connected Port Number": "5(path0) ", + "Sequence Number": 4, + "Commissioned Spare": "No", + "Emergency Spare": "No", + "Last Predictive Failure Event Sequence Number": 0, + "Successful diagnostics completion on": "N/A", + "FDE Type": "None", + "SED Capable": "No", + "SED Enabled": "No", + "Secured": "No", + "Cryptographic Erase Capable": "No", + "Sanitize Support": "Not supported", + "Locked": "No", + "Needs EKM Attention": "No", + "PI Eligible": "No", + "Drive is formatted for PI": "No", + "PI type": "No PI", + "Number of bytes of user data in LBA": "512B", + "Certified": "No", + "Wide Port Capable": "No", + "Multipath": "No", + "Port Information": [ + { + "Port": 0, + "Status": "Active", + "Linkspeed": "6.0Gb/s", + "SAS address": "0x4433221107000000" + } + ] + }, + "Inquiry Data": "5a 0c ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 20 20 20 20 4c 57 30 32 4c 34 32 46 00 00 00 00 00 00 42 53 30 33 20 20 20 20 54 53 36 31 30 30 4e 30 30 4d 31 30 2d 47 4b 32 31 4b 33 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 3f 00 10 fc fb 00 10 5d ff ff ff 0f 00 00 07 00 " + } + } + } + ] +} From 376b4d5f08d4238960e686747b798fe9d64111ba Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:59:29 +0300 Subject: [PATCH 02/16] Regenerate integrations.js (#17458) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 38 ++++ integrations/integrations.json | 38 ++++ src/collectors/COLLECTORS.md | 2 + .../go.d.plugin/modules/storcli/README.md | 1 + .../storcli/integrations/storecli_raid.md | 189 ++++++++++++++++++ 5 files changed, 268 insertions(+) create mode 120000 src/go/collectors/go.d.plugin/modules/storcli/README.md create mode 100644 src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md diff --git a/integrations/integrations.js b/integrations/integrations.js index cddcabd59cf33e..0d9862d36fb321 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -15786,6 +15786,44 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/squidlog/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-storcli", + "plugin_name": "go.d.plugin", + "module_name": "storcli", + "monitored_instance": { + "name": "StoreCLI RAID", + "link": "https://docs.broadcom.com/doc/12352476", + "icon_filename": "hard-drive.svg", + "categories": [ + "data-collection.storage-mount-points-and-filesystems" + ] + }, + "keywords": [ + "storage", + "raid-controller", + "manage-disks" + ], + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "most_popular": false + }, + "overview": "# StoreCLI RAID\n\nPlugin: go.d.plugin\nModule: storcli\n\n## Overview\n\nMonitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system.\nIt relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary.\nInstead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment.\nThis approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management.\n\nExecuted commands:\n- `storcli /cALL show all J nolog`\n- `storcli /cALL/eALL/sALL show all J nolog`\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/storcli.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/storcli.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| timeout | storcli binary execution timeout. | 2 | no |\n\n{% /details %}\n#### Examples\n\n##### Custom update_every\n\nAllows you to override the default data collection interval.\n\n{% details summary=\"Config\" %}\n```yaml\njobs:\n - name: storcli\n update_every: 5 # Collect StorCLI RAID statistics every 5 seconds\n\n```\n{% /details %}\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `storcli` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m storcli\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | status |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-storcli-StoreCLI_RAID", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-supervisord", diff --git a/integrations/integrations.json b/integrations/integrations.json index 36987d3f6212c7..a88476e2a8414e 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -15784,6 +15784,44 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/squidlog/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-storcli", + "plugin_name": "go.d.plugin", + "module_name": "storcli", + "monitored_instance": { + "name": "StoreCLI RAID", + "link": "https://docs.broadcom.com/doc/12352476", + "icon_filename": "hard-drive.svg", + "categories": [ + "data-collection.storage-mount-points-and-filesystems" + ] + }, + "keywords": [ + "storage", + "raid-controller", + "manage-disks" + ], + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "most_popular": false + }, + "overview": "# StoreCLI RAID\n\nPlugin: go.d.plugin\nModule: storcli\n\n## Overview\n\nMonitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system.\nIt relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary.\nInstead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment.\nThis approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management.\n\nExecuted commands:\n- `storcli /cALL show all J nolog`\n- `storcli /cALL/eALL/sALL show all J nolog`\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/storcli.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/storcli.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| timeout | storcli binary execution timeout. | 2 | no |\n\n#### Examples\n\n##### Custom update_every\n\nAllows you to override the default data collection interval.\n\n```yaml\njobs:\n - name: storcli\n update_every: 5 # Collect StorCLI RAID statistics every 5 seconds\n\n```\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `storcli` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m storcli\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | status |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-storcli-StoreCLI_RAID", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-supervisord", diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index e0c22df25c1ba8..26b81e77ec545d 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -1039,6 +1039,8 @@ If you don't see the app/service you'd like to monitor in this list: - [Starwind VSAN VSphere Edition](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/starwind_vsan_vsphere_edition.md) +- [StoreCLI RAID](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md) + - [Storidge](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/storidge.md) - [Synology ActiveBackup](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/synology_activebackup.md) diff --git a/src/go/collectors/go.d.plugin/modules/storcli/README.md b/src/go/collectors/go.d.plugin/modules/storcli/README.md new file mode 120000 index 00000000000000..482049b19a2a50 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/README.md @@ -0,0 +1 @@ +integrations/storecli_raid.md \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md b/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md new file mode 100644 index 00000000000000..bc4f81de14134c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md @@ -0,0 +1,189 @@ + + +# StoreCLI RAID + + + + + +Plugin: go.d.plugin +Module: storcli + + + +## Overview + +Monitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system. +It relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary. +Instead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment. +This approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management. + +Executed commands: +- `storcli /cALL show all J nolog` +- `storcli /cALL/eALL/sALL show all J nolog` + + + + +This collector is supported on all platforms. + +This collector only supports collecting metrics from a single instance of this integration. + + +### Default Behavior + +#### Auto-Detection + +This integration doesn't support auto-detection. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per controller + +These metrics refer to the Controller. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| controller_number | Controller number (index) | +| model | Controller model | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| storcli.controller_status | optimal, degraded, partially_degraded, failed | status | +| storcli.controller_bbu_status | healthy, unhealthy, na | status | + +### Per physical drive + +These metrics refer to the Physical Drive. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| controller_number | Controller number (index) | +| enclosure_number | Enclosure number (index) | +| slot_number | Slot number (index) | +| media type | Media type (e.g. HDD) | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| storcli.phys_drive_errors | media, other | errors/s | +| storcli.phys_drive_predictive_failures | predictive_failures | failures/s | +| storcli.phys_drive_smart_alert_status | active, inactive | status | +| storcli.phys_drive_temperature | temperature | status | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/storcli.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/storcli.conf +``` +#### Options + +The following options can be defined globally: update_every. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 10 | no | +| timeout | storcli binary execution timeout. | 2 | no | + +
+ +#### Examples + +##### Custom update_every + +Allows you to override the default data collection interval. + +
Config + +```yaml +jobs: + - name: storcli + update_every: 5 # Collect StorCLI RAID statistics every 5 seconds + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `storcli` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m storcli + ``` + + From 3d65360e6990bbf67b85fed0ab1ebc1620a6cdf3 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Sat, 20 Apr 2024 00:15:58 +0000 Subject: [PATCH 03/16] [ci skip] Update changelog and version for nightly build: v1.45.0-233-nightly. --- CHANGELOG.md | 22 +++++++++++----------- packaging/version | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f1b021ce43c65..903245e44c1888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,17 @@ **Merged pull requests:** +- Regenerate integrations.js [\#17458](https://github.com/netdata/netdata/pull/17458) ([netdatabot](https://github.com/netdatabot)) +- ndsudo add storcli [\#17455](https://github.com/netdata/netdata/pull/17455) ([ilyam8](https://github.com/ilyam8)) +- go.d add storcli collector [\#17454](https://github.com/netdata/netdata/pull/17454) ([ilyam8](https://github.com/ilyam8)) +- Bump github.com/vmware/govmomi from 0.37.0 to 0.37.1 in /src/go/collectors/go.d.plugin [\#17451](https://github.com/netdata/netdata/pull/17451) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/miekg/dns from 1.1.58 to 1.1.59 in /src/go/collectors/go.d.plugin [\#17449](https://github.com/netdata/netdata/pull/17449) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/prometheus/common from 0.52.3 to 0.53.0 in /src/go/collectors/go.d.plugin [\#17448](https://github.com/netdata/netdata/pull/17448) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump github.com/docker/docker from 26.0.1+incompatible to 26.0.2+incompatible in /src/go/collectors/go.d.plugin [\#17447](https://github.com/netdata/netdata/pull/17447) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Regenerate integrations.js [\#17446](https://github.com/netdata/netdata/pull/17446) ([netdatabot](https://github.com/netdatabot)) - Add documentation for VictorOps cloud notifications [\#17445](https://github.com/netdata/netdata/pull/17445) ([juacker](https://github.com/juacker)) +- Reconnect to the cloud when resuming from suspension [\#17444](https://github.com/netdata/netdata/pull/17444) ([stelfrag](https://github.com/stelfrag)) +- timex is not supported on windows. [\#17443](https://github.com/netdata/netdata/pull/17443) ([vkalintiris](https://github.com/vkalintiris)) - Regenerate integrations.js [\#17439](https://github.com/netdata/netdata/pull/17439) ([netdatabot](https://github.com/netdatabot)) - go.d mega/adaptec meta add alerts [\#17438](https://github.com/netdata/netdata/pull/17438) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#17437](https://github.com/netdata/netdata/pull/17437) ([netdatabot](https://github.com/netdatabot)) @@ -28,6 +38,7 @@ - Regenerate integrations.js [\#17416](https://github.com/netdata/netdata/pull/17416) ([netdatabot](https://github.com/netdatabot)) - go.d sd ll set max\_time\_series for prometheus/clickhouse [\#17415](https://github.com/netdata/netdata/pull/17415) ([ilyam8](https://github.com/ilyam8)) - rewrite megacli in go [\#17410](https://github.com/netdata/netdata/pull/17410) ([ilyam8](https://github.com/ilyam8)) +- dashboards doc edits [\#17409](https://github.com/netdata/netdata/pull/17409) ([Ancairon](https://github.com/Ancairon)) - Logs tab docs in dashboard section [\#17408](https://github.com/netdata/netdata/pull/17408) ([Ancairon](https://github.com/Ancairon)) - gh labeler: go.d.plugin rm suffix [\#17407](https://github.com/netdata/netdata/pull/17407) ([ilyam8](https://github.com/ilyam8)) - Bump go.mongodb.org/mongo-driver from 1.14.0 to 1.15.0 in /src/go/collectors/go.d.plugin [\#17406](https://github.com/netdata/netdata/pull/17406) ([dependabot[bot]](https://github.com/apps/dependabot)) @@ -407,17 +418,6 @@ - Rm refs to map and save modes [\#16874](https://github.com/netdata/netdata/pull/16874) ([vkalintiris](https://github.com/vkalintiris)) - Fix coverity issues [\#16873](https://github.com/netdata/netdata/pull/16873) ([stelfrag](https://github.com/stelfrag)) - Network Viewer \(local-sockets version\) [\#16872](https://github.com/netdata/netdata/pull/16872) ([ktsaou](https://github.com/ktsaou)) -- Limit what we upload to GCS for nightlies. [\#16870](https://github.com/netdata/netdata/pull/16870) ([Ferroin](https://github.com/Ferroin)) -- Use dagger to build and test the agent. [\#16868](https://github.com/netdata/netdata/pull/16868) ([vkalintiris](https://github.com/vkalintiris)) -- Local sockets for network namespaces [\#16867](https://github.com/netdata/netdata/pull/16867) ([ktsaou](https://github.com/ktsaou)) -- Fix coverity issue [\#16866](https://github.com/netdata/netdata/pull/16866) ([stelfrag](https://github.com/stelfrag)) -- update alpine 3.16 fts-dev [\#16865](https://github.com/netdata/netdata/pull/16865) ([ilyam8](https://github.com/ilyam8)) -- Remove old mention of save db mode [\#16864](https://github.com/netdata/netdata/pull/16864) ([Ancairon](https://github.com/Ancairon)) -- port useful code from incomplete PRs [\#16863](https://github.com/netdata/netdata/pull/16863) ([ktsaou](https://github.com/ktsaou)) -- apply the right prototype to instances [\#16862](https://github.com/netdata/netdata/pull/16862) ([ktsaou](https://github.com/ktsaou)) -- detect sockets direction [\#16861](https://github.com/netdata/netdata/pull/16861) ([ktsaou](https://github.com/ktsaou)) -- add freebsd jail detection to system-info.sh [\#16858](https://github.com/netdata/netdata/pull/16858) ([ilyam8](https://github.com/ilyam8)) -- Add ARMv6 static builds. [\#16853](https://github.com/netdata/netdata/pull/16853) ([Ferroin](https://github.com/Ferroin)) ## [v1.44.3](https://github.com/netdata/netdata/tree/v1.44.3) (2024-02-12) diff --git a/packaging/version b/packaging/version index 151be1cf1a6272..61d3afcc6ac4f1 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.45.0-221-nightly +v1.45.0-233-nightly From 623244f8a9dc134c37599d1e07e7792c9f193583 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Sat, 20 Apr 2024 19:20:12 +0300 Subject: [PATCH 04/16] go.d storcli update (#17460) --- .../go.d.plugin/modules/storcli/charts.go | 45 +++++++++++++- .../modules/storcli/collect_controllers.go | 24 ++++++-- .../modules/storcli/collect_drives.go | 12 ++-- .../go.d.plugin/modules/storcli/metadata.yaml | 36 ++++++++++- .../modules/storcli/storcli_test.go | 15 ++--- src/health/health.d/megacli.conf | 2 +- src/health/health.d/storcli.conf | 61 +++++++++++++++++++ 7 files changed, 174 insertions(+), 21 deletions(-) create mode 100644 src/health/health.d/storcli.conf diff --git a/src/go/collectors/go.d.plugin/modules/storcli/charts.go b/src/go/collectors/go.d.plugin/modules/storcli/charts.go index 65cd75a3319347..f12b2d1a78d124 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/charts.go +++ b/src/go/collectors/go.d.plugin/modules/storcli/charts.go @@ -18,6 +18,8 @@ const ( prioPhysDrivePredictiveFailures prioPhysDriveSmartAlertStatus prioPhysDriveTemperature + + prioBBUTemperature ) var controllerChartsTmpl = module.Charts{ @@ -106,7 +108,7 @@ var ( physDriveTemperatureChartTmpl = module.Chart{ ID: "phys_drive_%s_cntrl_%s_temperature", Title: "Physical Drive temperature", - Units: "status", + Units: "Celsius", Fam: "pd temperature", Ctx: "storcli.phys_drive_temperature", Type: module.Line, @@ -117,6 +119,25 @@ var ( } ) +var bbuChartsTmpl = module.Charts{ + bbuTemperatureChartTmpl.Copy(), +} + +var ( + bbuTemperatureChartTmpl = module.Chart{ + ID: "bbu_%s_cntrl_%s_temperature", + Title: "BBU temperature", + Units: "Celsius", + Fam: "bbu temperature", + Ctx: "storcli.bbu_temperature", + Type: module.Line, + Priority: prioBBUTemperature, + Dims: module.Dims{ + {ID: "bbu_%s_cntrl_%s_temperature", Name: "temperature"}, + }, + } +) + func (s *StorCli) addControllerCharts(cntrl controllerInfo) { charts := controllerChartsTmpl.Copy() @@ -141,7 +162,7 @@ func (s *StorCli) addControllerCharts(cntrl controllerInfo) { func (s *StorCli) addPhysDriveCharts(cntrlNum int, di *driveInfo, ds *driveState, da *driveAttrs) { charts := physDriveChartsTmpl.Copy() - if _, ok := parseInt(getDriveTemperature(ds.DriveTemperature)); !ok { + if _, ok := parseInt(getTemperature(ds.DriveTemperature)); !ok { _ = charts.Remove(physDriveTemperatureChartTmpl.ID) } @@ -169,3 +190,23 @@ func (s *StorCli) addPhysDriveCharts(cntrlNum int, di *driveInfo, ds *driveState s.Warning(err) } } + +func (s *StorCli) addBBUCharts(cntrlNum, bbuNum, model string) { + charts := bbuChartsTmpl.Copy() + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, bbuNum, cntrlNum) + chart.Labels = []module.Label{ + {Key: "controller_number", Value: cntrlNum}, + {Key: "bbu_number", Value: bbuNum}, + {Key: "model", Value: model}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, bbuNum, cntrlNum) + } + } + + if err := s.Charts().Add(*charts...); err != nil { + s.Warning(err) + } +} diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go index 259013e6c16810..d1302aea014322 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go +++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go @@ -47,13 +47,14 @@ func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersI for _, v := range resp.Controllers { cntrl := v.ResponseData - idx := strconv.Itoa(cntrl.Basics.Controller) - if !s.controllers[idx] { - s.controllers[idx] = true + cntrlNum := strconv.Itoa(cntrl.Basics.Controller) + + if !s.controllers[cntrlNum] { + s.controllers[cntrlNum] = true s.addControllerCharts(cntrl) } - px := fmt.Sprintf("cntrl_%s_", idx) + px := fmt.Sprintf("cntrl_%s_", cntrlNum) for _, st := range []string{"optimal", "degraded", "partially_degraded", "failed"} { mx[px+"status_"+st] = 0 @@ -72,7 +73,22 @@ func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersI default: mx[px+"bbu_status_unhealthy"] = 1 } + + for i, bbu := range cntrl.BBUInfo { + bbuNum := strconv.Itoa(i) + if k := cntrlNum + bbuNum; !s.bbu[k] { + s.bbu[k] = true + s.addBBUCharts(cntrlNum, bbuNum, bbu.Model) + } + + px := fmt.Sprintf("bbu_%s_cntrl_%s_", bbuNum, cntrlNum) + + if v, ok := parseInt(getTemperature(bbu.Temp)); ok { + mx[px+"temperature"] = v + } + } } + return nil } diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go index 353728d6dcd79f..c84ca4b1e2dc67 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go +++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go @@ -112,7 +112,7 @@ func (s *StorCli) collectMegaRaidDrives(mx map[string]int64, resp *drivesInfoRes if v, ok := parseInt(string(state.PredictiveFailureCount)); ok { mx[px+"predictive_failure_count"] = v } - if v, ok := parseInt(getDriveTemperature(state.DriveTemperature)); ok { + if v, ok := parseInt(getTemperature(state.DriveTemperature)); ok { mx[px+"temperature"] = v } for _, st := range []string{"active", "inactive"} { @@ -120,6 +120,8 @@ func (s *StorCli) collectMegaRaidDrives(mx map[string]int64, resp *drivesInfoRes } if state.SmartAlertFlagged == "Yes" { mx[px+"smart_alert_status_active"] = 1 + } else { + mx[px+"smart_alert_status_inactive"] = 1 } } } @@ -216,13 +218,13 @@ func getDriveAttrs(driveDetailedInfo map[string]json.RawMessage, id string) (*dr return &state, nil } -func getDriveTemperature(s string) string { - // ' 28C (82.40 F)' - i := strings.IndexByte(s, 'C') +func getTemperature(temp string) string { + // ' 28C (82.40 F)' (drive) or '33C' (bbu) + i := strings.IndexByte(temp, 'C') if i == -1 { return "" } - return strings.TrimSpace(s[:i]) + return strings.TrimSpace(temp[:i]) } func parseInt(s string) (int64, bool) { diff --git a/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml index ecf97fb4420eb2..ab7866bcf90a83 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml +++ b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml @@ -80,7 +80,23 @@ modules: troubleshooting: problems: list: [] - alerts: [] + alerts: + - name: storcli_controller_status + metric: storcli.controller_status + info: RAID controller ${label:controller_number} health status is not optimal + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf + - name: storcli_controller_bbu_status + metric: storcli.controller_bbu_status + info: RAID controller ${label:controller_number} BBU is unhealthy + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf + - name: storcli_phys_drive_errors + metric: storcli.phys_drive_errors + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf + - name: storcli_phys_drive_predictive_failures + metric: storcli.phys_drive_predictive_failures + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf metrics: folding: title: Metrics @@ -147,7 +163,23 @@ modules: - name: inactive - name: storcli.phys_drive_temperature description: Physical Drive temperature - unit: status + unit: Celsius + chart_type: line + dimensions: + - name: temperature + - name: bbu + description: These metrics refer to the Backup Battery Unit. + labels: + - name: controller_number + description: Controller number (index) + - name: bbu_number + description: BBU number (index) + - name: model + description: BBU model + metrics: + - name: storcli.bbu_temperature + description: BBU temperature + unit: Celsius chart_type: line dimensions: - name: temperature diff --git a/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go b/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go index 84ea3bb4c3725d..74c92b87048e82 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go +++ b/src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go @@ -147,8 +147,9 @@ func TestStorCli_Collect(t *testing.T) { }{ "success MegaRAID controller": { prepareMock: prepareMockMegaRaidOK, - wantCharts: len(controllerChartsTmpl)*1 + len(physDriveChartsTmpl)*6, + wantCharts: len(controllerChartsTmpl)*1 + len(physDriveChartsTmpl)*6 + len(bbuChartsTmpl)*1, wantMetrics: map[string]int64{ + "bbu_0_cntrl_0_temperature": 34, "cntrl_0_bbu_status_healthy": 1, "cntrl_0_bbu_status_na": 0, "cntrl_0_bbu_status_unhealthy": 0, @@ -160,37 +161,37 @@ func TestStorCli_Collect(t *testing.T) { "phys_drive_5000C500C36C8BCD_cntrl_0_other_error_count": 0, "phys_drive_5000C500C36C8BCD_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500C36C8BCD_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500C36C8BCD_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500C36C8BCD_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500C36C8BCD_cntrl_0_temperature": 28, "phys_drive_5000C500D59840FE_cntrl_0_media_error_count": 0, "phys_drive_5000C500D59840FE_cntrl_0_other_error_count": 0, "phys_drive_5000C500D59840FE_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500D59840FE_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500D59840FE_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500D59840FE_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500D59840FE_cntrl_0_temperature": 28, "phys_drive_5000C500D6061539_cntrl_0_media_error_count": 0, "phys_drive_5000C500D6061539_cntrl_0_other_error_count": 0, "phys_drive_5000C500D6061539_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500D6061539_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500D6061539_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500D6061539_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500D6061539_cntrl_0_temperature": 28, "phys_drive_5000C500DC79B194_cntrl_0_media_error_count": 0, "phys_drive_5000C500DC79B194_cntrl_0_other_error_count": 0, "phys_drive_5000C500DC79B194_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500DC79B194_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500DC79B194_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500DC79B194_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500DC79B194_cntrl_0_temperature": 28, "phys_drive_5000C500E54F4EBB_cntrl_0_media_error_count": 0, "phys_drive_5000C500E54F4EBB_cntrl_0_other_error_count": 0, "phys_drive_5000C500E54F4EBB_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500E54F4EBB_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500E54F4EBB_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500E54F4EBB_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500E54F4EBB_cntrl_0_temperature": 28, "phys_drive_5000C500E5659BA7_cntrl_0_media_error_count": 0, "phys_drive_5000C500E5659BA7_cntrl_0_other_error_count": 0, "phys_drive_5000C500E5659BA7_cntrl_0_predictive_failure_count": 0, "phys_drive_5000C500E5659BA7_cntrl_0_smart_alert_status_active": 0, - "phys_drive_5000C500E5659BA7_cntrl_0_smart_alert_status_inactive": 0, + "phys_drive_5000C500E5659BA7_cntrl_0_smart_alert_status_inactive": 1, "phys_drive_5000C500E5659BA7_cntrl_0_temperature": 27, }, }, diff --git a/src/health/health.d/megacli.conf b/src/health/health.d/megacli.conf index 8d71d585bf2386..27721fa9ad2600 100644 --- a/src/health/health.d/megacli.conf +++ b/src/health/health.d/megacli.conf @@ -38,7 +38,7 @@ component: RAID type: System component: RAID lookup: sum -10s - units: media errors + units: failures every: 10s warn: $this > 0 delay: up 1m down 5m multiplier 2 max 10m diff --git a/src/health/health.d/storcli.conf b/src/health/health.d/storcli.conf new file mode 100644 index 00000000000000..0beda768622089 --- /dev/null +++ b/src/health/health.d/storcli.conf @@ -0,0 +1,61 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + +# Controllers + + template: storcli_controller_status + on: storcli.controller_status + class: Errors + type: System +component: RAID + lookup: average -1m unaligned percentage of optimal + units: % + every: 10s + crit: $this < 100 + delay: down 5m multiplier 2 max 10m + summary: RAID controller ${label:controller_number} health + info: RAID controller ${label:controller_number} health status is not optimal + to: sysadmin + + template: storcli_controller_bbu_status + on: storcli.controller_bbu_status + class: Errors + type: System +component: RAID + lookup: average -1m unaligned percentage of healthy,na + units: % + every: 10s + crit: $this < 100 + delay: down 5m multiplier 2 max 10m + summary: RAID controller ${label:controller_number} BBU health + info: RAID controller ${label:controller_number} BBU is unhealthy + to: sysadmin + +# Physical Drives + + template: storcli_phys_drive_errors + on: storcli.phys_drive_errors + class: Errors + type: System +component: RAID + lookup: sum -10s + units: errors + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors + to: sysadmin + + template: storcli_phys_drive_predictive_failures + on: storcli.phys_drive_predictive_failures + class: Errors + type: System +component: RAID + lookup: sum -10s + units: failures + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures + to: sysadmin From aeb5dd7e8cfb4eb334f5eaff269b8296ee6456b0 Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Sat, 20 Apr 2024 19:27:44 +0300 Subject: [PATCH 05/16] Regenerate integrations.js (#17461) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 4 +-- integrations/integrations.json | 4 +-- .../storcli/integrations/storecli_raid.md | 30 +++++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/integrations/integrations.js b/integrations/integrations.js index 0d9862d36fb321..95728bf2c8f16f 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -15817,8 +15817,8 @@ export const integrations = [ "overview": "# StoreCLI RAID\n\nPlugin: go.d.plugin\nModule: storcli\n\n## Overview\n\nMonitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system.\nIt relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary.\nInstead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment.\nThis approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management.\n\nExecuted commands:\n- `storcli /cALL show all J nolog`\n- `storcli /cALL/eALL/sALL show all J nolog`\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/storcli.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/storcli.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| timeout | storcli binary execution timeout. | 2 | no |\n\n{% /details %}\n#### Examples\n\n##### Custom update_every\n\nAllows you to override the default data collection interval.\n\n{% details summary=\"Config\" %}\n```yaml\njobs:\n - name: storcli\n update_every: 5 # Collect StorCLI RAID statistics every 5 seconds\n\n```\n{% /details %}\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `storcli` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m storcli\n ```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | status |\n\n", + "alerts": "## Alerts\n\n\nThe following alerts are available:\n\n| Alert name | On metric | Description |\n|:------------|:----------|:------------|\n| [ storcli_controller_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_status | RAID controller ${label:controller_number} health status is not optimal |\n| [ storcli_controller_bbu_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_bbu_status | RAID controller ${label:controller_number} BBU is unhealthy |\n| [ storcli_phys_drive_errors ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_errors | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors |\n| [ storcli_phys_drive_predictive_failures ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_predictive_failures | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures |\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | Celsius |\n\n### Per bbu\n\nThese metrics refer to the Backup Battery Unit.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| bbu_number | BBU number (index) |\n| model | BBU model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.bbu_temperature | temperature | Celsius |\n\n", "integration_type": "collector", "id": "go.d.plugin-storcli-StoreCLI_RAID", "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml", diff --git a/integrations/integrations.json b/integrations/integrations.json index a88476e2a8414e..21da9132cc0b16 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -15815,8 +15815,8 @@ "overview": "# StoreCLI RAID\n\nPlugin: go.d.plugin\nModule: storcli\n\n## Overview\n\nMonitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system.\nIt relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary.\nInstead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment.\nThis approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management.\n\nExecuted commands:\n- `storcli /cALL show all J nolog`\n- `storcli /cALL/eALL/sALL show all J nolog`\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", "setup": "## Setup\n\n### Prerequisites\n\nNo action required.\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/storcli.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/storcli.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| timeout | storcli binary execution timeout. | 2 | no |\n\n#### Examples\n\n##### Custom update_every\n\nAllows you to override the default data collection interval.\n\n```yaml\njobs:\n - name: storcli\n update_every: 5 # Collect StorCLI RAID statistics every 5 seconds\n\n```\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `storcli` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m storcli\n ```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | status |\n\n", + "alerts": "## Alerts\n\n\nThe following alerts are available:\n\n| Alert name | On metric | Description |\n|:------------|:----------|:------------|\n| [ storcli_controller_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_status | RAID controller ${label:controller_number} health status is not optimal |\n| [ storcli_controller_bbu_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_bbu_status | RAID controller ${label:controller_number} BBU is unhealthy |\n| [ storcli_phys_drive_errors ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_errors | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors |\n| [ storcli_phys_drive_predictive_failures ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_predictive_failures | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures |\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per controller\n\nThese metrics refer to the Controller.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| model | Controller model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.controller_status | optimal, degraded, partially_degraded, failed | status |\n| storcli.controller_bbu_status | healthy, unhealthy, na | status |\n\n### Per physical drive\n\nThese metrics refer to the Physical Drive.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| enclosure_number | Enclosure number (index) |\n| slot_number | Slot number (index) |\n| media type | Media type (e.g. HDD) |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.phys_drive_errors | media, other | errors/s |\n| storcli.phys_drive_predictive_failures | predictive_failures | failures/s |\n| storcli.phys_drive_smart_alert_status | active, inactive | status |\n| storcli.phys_drive_temperature | temperature | Celsius |\n\n### Per bbu\n\nThese metrics refer to the Backup Battery Unit.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| controller_number | Controller number (index) |\n| bbu_number | BBU number (index) |\n| model | BBU model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| storcli.bbu_temperature | temperature | Celsius |\n\n", "integration_type": "collector", "id": "go.d.plugin-storcli-StoreCLI_RAID", "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml", diff --git a/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md b/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md index bc4f81de14134c..7cae126f66c3be 100644 --- a/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md +++ b/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md @@ -99,13 +99,39 @@ Metrics: | storcli.phys_drive_errors | media, other | errors/s | | storcli.phys_drive_predictive_failures | predictive_failures | failures/s | | storcli.phys_drive_smart_alert_status | active, inactive | status | -| storcli.phys_drive_temperature | temperature | status | +| storcli.phys_drive_temperature | temperature | Celsius | + +### Per bbu + +These metrics refer to the Backup Battery Unit. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| controller_number | Controller number (index) | +| bbu_number | BBU number (index) | +| model | BBU model | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| storcli.bbu_temperature | temperature | Celsius | ## Alerts -There are no alerts configured by default for this integration. + +The following alerts are available: + +| Alert name | On metric | Description | +|:------------|:----------|:------------| +| [ storcli_controller_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_status | RAID controller ${label:controller_number} health status is not optimal | +| [ storcli_controller_bbu_status ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.controller_bbu_status | RAID controller ${label:controller_number} BBU is unhealthy | +| [ storcli_phys_drive_errors ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_errors | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors | +| [ storcli_phys_drive_predictive_failures ](https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf) | storcli.phys_drive_predictive_failures | RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures | ## Setup From 7fdef9d8bb1bd4a1c5bdd4c218867da3af5b51d7 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Sun, 21 Apr 2024 00:17:08 +0000 Subject: [PATCH 06/16] [ci skip] Update changelog and version for nightly build: v1.45.0-236-nightly. --- CHANGELOG.md | 4 ++-- packaging/version | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 903245e44c1888..bb716ceba51d97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ **Merged pull requests:** +- Regenerate integrations.js [\#17461](https://github.com/netdata/netdata/pull/17461) ([netdatabot](https://github.com/netdatabot)) +- go.d storcli update [\#17460](https://github.com/netdata/netdata/pull/17460) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#17458](https://github.com/netdata/netdata/pull/17458) ([netdatabot](https://github.com/netdatabot)) - ndsudo add storcli [\#17455](https://github.com/netdata/netdata/pull/17455) ([ilyam8](https://github.com/ilyam8)) - go.d add storcli collector [\#17454](https://github.com/netdata/netdata/pull/17454) ([ilyam8](https://github.com/ilyam8)) @@ -416,8 +418,6 @@ - Network viewer fixes [\#16877](https://github.com/netdata/netdata/pull/16877) ([ktsaou](https://github.com/ktsaou)) - Add requirements.txt for dag [\#16875](https://github.com/netdata/netdata/pull/16875) ([vkalintiris](https://github.com/vkalintiris)) - Rm refs to map and save modes [\#16874](https://github.com/netdata/netdata/pull/16874) ([vkalintiris](https://github.com/vkalintiris)) -- Fix coverity issues [\#16873](https://github.com/netdata/netdata/pull/16873) ([stelfrag](https://github.com/stelfrag)) -- Network Viewer \(local-sockets version\) [\#16872](https://github.com/netdata/netdata/pull/16872) ([ktsaou](https://github.com/ktsaou)) ## [v1.44.3](https://github.com/netdata/netdata/tree/v1.44.3) (2024-02-12) diff --git a/packaging/version b/packaging/version index 61d3afcc6ac4f1..5ac470c124b2f5 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.45.0-233-nightly +v1.45.0-236-nightly From 7826adcf6ce4a9678f6cfefcc2c5edc80ab9f57e Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Sun, 21 Apr 2024 16:13:34 +0300 Subject: [PATCH 07/16] go.d add hddtemp (#17462) --- .../collectors/go.d.plugin/config/go.d.conf | 1 + .../go.d.plugin/config/go.d/hddtemp.conf | 6 + .../config/go.d/sd/net_listeners.conf | 7 + .../go.d.plugin/modules/hddtemp/charts.go | 70 ++++ .../go.d.plugin/modules/hddtemp/client.go | 44 +++ .../go.d.plugin/modules/hddtemp/collect.go | 140 ++++++++ .../modules/hddtemp/config_schema.json | 44 +++ .../go.d.plugin/modules/hddtemp/hddtemp.go | 104 ++++++ .../modules/hddtemp/hddtemp_test.go | 321 ++++++++++++++++++ .../go.d.plugin/modules/hddtemp/metadata.yaml | 134 ++++++++ .../modules/hddtemp/testdata/config.json | 5 + .../modules/hddtemp/testdata/config.yaml | 3 + .../hddtemp/testdata/hddtemp-all-ok.txt | 1 + .../hddtemp/testdata/hddtemp-all-sleep.txt | 1 + src/go/collectors/go.d.plugin/modules/init.go | 1 + 15 files changed, 882 insertions(+) create mode 100644 src/go/collectors/go.d.plugin/config/go.d/hddtemp.conf create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/charts.go create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/client.go create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/collect.go create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/config_schema.json create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp.go create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp_test.go create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.json create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-ok.txt create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-sleep.txt diff --git a/src/go/collectors/go.d.plugin/config/go.d.conf b/src/go/collectors/go.d.plugin/config/go.d.conf index ab3a5aca742d52..9fe91db5d95a0b 100644 --- a/src/go/collectors/go.d.plugin/config/go.d.conf +++ b/src/go/collectors/go.d.plugin/config/go.d.conf @@ -39,6 +39,7 @@ modules: # fluentd: yes # freeradius: yes # haproxy: yes +# hddtemp: yes # hdfs: yes # httpcheck: yes # intelgpu: yes diff --git a/src/go/collectors/go.d.plugin/config/go.d/hddtemp.conf b/src/go/collectors/go.d.plugin/config/go.d/hddtemp.conf new file mode 100644 index 00000000000000..a2ea8452d3f608 --- /dev/null +++ b/src/go/collectors/go.d.plugin/config/go.d/hddtemp.conf @@ -0,0 +1,6 @@ +## All available configuration options, their descriptions and default values: +## https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/hddtemp#readme + +#jobs: +# - name: local +# address: 127.0.0.1:7634 diff --git a/src/go/collectors/go.d.plugin/config/go.d/sd/net_listeners.conf b/src/go/collectors/go.d.plugin/config/go.d/sd/net_listeners.conf index 1c240ce5b672d3..6c2e22c71c9e05 100644 --- a/src/go/collectors/go.d.plugin/config/go.d/sd/net_listeners.conf +++ b/src/go/collectors/go.d.plugin/config/go.d/sd/net_listeners.conf @@ -50,6 +50,8 @@ classify: expr: '{{ and (eq .Port "6060") (eq .Comm "geth") }}' - tags: "haproxy" expr: '{{ and (eq .Port "8404") (eq .Comm "haproxy") }}' + - tags: "hddtemp" + expr: '{{ and (eq .Port "7634") (eq .Comm "hddtemp") }}' - tags: "hdfs_namenode" expr: '{{ and (eq .Port "9870") (eq .Comm "hadoop") }}' - tags: "hdfs_datanode" @@ -226,6 +228,11 @@ compose: module: haproxy name: local url: http://{{.Address}}/metrics + - selector: "hddtemp" + template: | + module: hddtemp + name: local + address: {{.Address}} - selector: "hdfs_namenode" template: | module: hdfs diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/charts.go b/src/go/collectors/go.d.plugin/modules/hddtemp/charts.go new file mode 100644 index 00000000000000..7a5e9ed9f47e8b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/charts.go @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package hddtemp + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioDiskTemperature = module.Priority + iota + prioDiskTemperatureSensorStatus +) + +var ( + diskTemperatureChartsTmpl = module.Chart{ + ID: "disk_%s_temperature", + Title: "Disk temperature", + Units: "Celsius", + Fam: "temperature", + Ctx: "hddtemp.disk_temperature", + Type: module.Line, + Priority: prioDiskTemperature, + Dims: module.Dims{ + {ID: "disk_%s_temperature", Name: "temperature"}, + }, + } + diskTemperatureSensorChartsTmpl = module.Chart{ + ID: "disk_%s_temperature_sensor_status", + Title: "Disk temperature sensor status", + Units: "status", + Fam: "sensor", + Ctx: "hddtemp.disk_temperature_sensor_status", + Type: module.Line, + Priority: prioDiskTemperatureSensorStatus, + Dims: module.Dims{ + {ID: "disk_%s_temp_sensor_status_ok", Name: "ok"}, + {ID: "disk_%s_temp_sensor_status_err", Name: "err"}, + {ID: "disk_%s_temp_sensor_status_na", Name: "na"}, + {ID: "disk_%s_temp_sensor_status_unk", Name: "unk"}, + {ID: "disk_%s_temp_sensor_status_nos", Name: "nos"}, + {ID: "disk_%s_temp_sensor_status_slp", Name: "slp"}, + }, + } +) + +func (h *HddTemp) addDiskTempSensorStatusChart(id string, disk diskStats) { + h.addDiskChart(id, disk, diskTemperatureSensorChartsTmpl.Copy()) +} + +func (h *HddTemp) addDiskTempChart(id string, disk diskStats) { + h.addDiskChart(id, disk, diskTemperatureChartsTmpl.Copy()) +} + +func (h *HddTemp) addDiskChart(id string, disk diskStats, chart *module.Chart) { + chart.ID = fmt.Sprintf(chart.ID, strings.ToLower(id)) + chart.Labels = []module.Label{ + {Key: "disk_id", Value: id}, + {Key: "model", Value: disk.model}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, id) + } + + if err := h.Charts().Add(chart); err != nil { + h.Warning(err) + } +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/client.go b/src/go/collectors/go.d.plugin/modules/hddtemp/client.go new file mode 100644 index 00000000000000..626381ee86bc6a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/client.go @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package hddtemp + +import ( + "github.com/netdata/netdata/go/go.d.plugin/pkg/socket" +) + +func newHddTempConn(conf Config) hddtempConn { + return &hddtempClient{conn: socket.New(socket.Config{ + Address: conf.Address, + ConnectTimeout: conf.Timeout.Duration(), + ReadTimeout: conf.Timeout.Duration(), + WriteTimeout: conf.Timeout.Duration(), + })} +} + +type hddtempClient struct { + conn socket.Client +} + +func (c *hddtempClient) connect() error { + return c.conn.Connect() +} + +func (c *hddtempClient) disconnect() { + _ = c.conn.Disconnect() +} + +func (c *hddtempClient) queryHddTemp() (string, error) { + var i int + var s string + err := c.conn.Command("", func(bytes []byte) bool { + if i++; i > 1 { + return false + } + s = string(bytes) + return true + }) + if err != nil { + return "", err + } + return s, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/collect.go b/src/go/collectors/go.d.plugin/modules/hddtemp/collect.go new file mode 100644 index 00000000000000..f5c75db041296d --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/collect.go @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package hddtemp + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +type diskStats struct { + devPath string + model string + temperature string + unit string +} + +func (h *HddTemp) collect() (map[string]int64, error) { + conn := h.newHddTempConn(h.Config) + + if err := conn.connect(); err != nil { + return nil, err + } + + defer conn.disconnect() + + msg, err := conn.queryHddTemp() + if err != nil { + return nil, err + } + + h.Debugf("hddtemp daemon response: %s", msg) + + disks, err := parseHddTempMessage(msg) + if err != nil { + return nil, err + } + + mx := make(map[string]int64) + + for _, disk := range disks { + id := getDiskID(disk) + if id == "" { + h.Debugf("can not extract disk id from '%s'", disk.devPath) + continue + } + + if !h.disks[id] { + h.disks[id] = true + h.addDiskTempSensorStatusChart(id, disk) + } + + px := fmt.Sprintf("disk_%s_", id) + + for _, st := range []string{"ok", "na", "unk", "nos", "slp", "err"} { + mx[px+"temp_sensor_status_"+st] = 0 + } + switch disk.temperature { + case "NA": + mx[px+"temp_sensor_status_na"] = 1 + case "UNK": + mx[px+"temp_sensor_status_unk"] = 1 + case "NOS": + mx[px+"temp_sensor_status_nos"] = 1 + case "SLP": + mx[px+"temp_sensor_status_slp"] = 1 + case "ERR": + mx[px+"temp_sensor_status_err"] = 1 + default: + if v, ok := getTemperature(disk); ok { + if !h.disksTemp[id] { + h.disksTemp[id] = true + h.addDiskTempChart(id, disk) + } + mx[px+"temp_sensor_status_ok"] = 1 + mx[px+"temperature"] = v + } else { + mx[px+"temp_sensor_status_unk"] = 1 + } + } + } + + return mx, nil +} + +func getDiskID(d diskStats) string { + i := strings.LastIndexByte(d.devPath, '/') + if i == -1 { + return "" + } + return d.devPath[i+1:] +} + +func getTemperature(d diskStats) (int64, bool) { + v, err := strconv.ParseInt(d.temperature, 10, 64) + if err != nil { + return 0, false + } + if d.unit == "F" { + v = (v - 32) * 5 / 9 + } + return v, true +} + +func parseHddTempMessage(msg string) ([]diskStats, error) { + if msg == "" { + return nil, errors.New("empty hddtemp message") + } + + // https://github.com/guzu/hddtemp/blob/e16aed6d0145d7ad8b3308dd0b9199fc701c0417/src/daemon.c#L165 + parts := strings.Split(msg, "|") + + var i int + // remove empty values + for _, v := range parts { + if v = strings.TrimSpace(v); v != "" { + parts[i] = v + i++ + } + } + parts = parts[:i] + + if len(parts) == 0 || len(parts)%4 != 0 { + return nil, errors.New("invalid hddtemp output format") + } + + var disks []diskStats + + for i := 0; i < len(parts); i += 4 { + disks = append(disks, diskStats{ + devPath: parts[i], + model: parts[i+1], + temperature: parts[i+2], + unit: parts[i+3], + }) + } + + return disks, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/config_schema.json b/src/go/collectors/go.d.plugin/modules/hddtemp/config_schema.json new file mode 100644 index 00000000000000..2858fbe0267052 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/config_schema.json @@ -0,0 +1,44 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "HddTemp collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "address": { + "title": "Address", + "description": "The IP address and port where the hddtemp daemon listens for connections.", + "type": "string", + "default": "127.0.0.1:7634" + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for establishing a connection and communication (reading and writing) in seconds.", + "type": "number", + "minimum": 0.5, + "default": 1 + } + }, + "required": [ + "address" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp.go b/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp.go new file mode 100644 index 00000000000000..3976506053ddde --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp.go @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package hddtemp + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("hddtemp", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + }) +} + +func New() *HddTemp { + return &HddTemp{ + Config: Config{ + Address: "127.0.0.1:7634", + Timeout: web.Duration(time.Second * 1), + }, + newHddTempConn: newHddTempConn, + charts: &module.Charts{}, + disks: make(map[string]bool), + disksTemp: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Address string `yaml:"address" json:"address"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` +} + +type ( + HddTemp struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + newHddTempConn func(Config) hddtempConn + + disks map[string]bool + disksTemp map[string]bool + } + + hddtempConn interface { + connect() error + disconnect() + queryHddTemp() (string, error) + } +) + +func (h *HddTemp) Configuration() any { + return h.Config +} + +func (h *HddTemp) Init() error { + if h.Address == "" { + h.Error("config: 'address' not set") + return errors.New("address not set") + } + + return nil +} + +func (h *HddTemp) Check() error { + mx, err := h.collect() + if err != nil { + h.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + } + return nil +} + +func (h *HddTemp) Charts() *module.Charts { + return h.charts +} + +func (h *HddTemp) Collect() map[string]int64 { + mx, err := h.collect() + if err != nil { + h.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (h *HddTemp) Cleanup() {} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp_test.go b/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp_test.go new file mode 100644 index 00000000000000..cab4ceb970e2f1 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/hddtemp_test.go @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package hddtemp + +import ( + "errors" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") + + dataAllOK, _ = os.ReadFile("testdata/hddtemp-all-ok.txt") + dataAllSleep, _ = os.ReadFile("testdata/hddtemp-all-sleep.txt") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + + "dataAllOK": dataAllOK, + "dataAllSleep": dataAllSleep, + } { + require.NotNil(t, data, name) + } +} + +func TestHddTemp_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &HddTemp{}, dataConfigJSON, dataConfigYAML) +} + +func TestHddTemp_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "success with default config": { + wantFail: false, + config: New().Config, + }, + "fails if address not set": { + wantFail: true, + config: func() Config { + conf := New().Config + conf.Address = "" + return conf + }(), + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + hdd := New() + hdd.Config = test.config + + if test.wantFail { + assert.Error(t, hdd.Init()) + } else { + assert.NoError(t, hdd.Init()) + } + }) + } +} + +func TestHddTemp_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func() *HddTemp + }{ + "not initialized": { + prepare: func() *HddTemp { + return New() + }, + }, + "after check": { + prepare: func() *HddTemp { + hdd := New() + hdd.newHddTempConn = func(config Config) hddtempConn { return prepareMockAllDisksOk() } + _ = hdd.Check() + return hdd + }, + }, + "after collect": { + prepare: func() *HddTemp { + hdd := New() + hdd.newHddTempConn = func(config Config) hddtempConn { return prepareMockAllDisksOk() } + _ = hdd.Collect() + return hdd + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + hdd := test.prepare() + + assert.NotPanics(t, hdd.Cleanup) + }) + } +} + +func TestHddTemp_Charts(t *testing.T) { + assert.NotNil(t, New().Charts()) +} + +func TestHddTemp_Check(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockHddTempConn + wantFail bool + }{ + "all disks ok": { + wantFail: false, + prepareMock: prepareMockAllDisksOk, + }, + "all disks sleep": { + wantFail: false, + prepareMock: prepareMockAllDisksSleep, + }, + "err on connect": { + wantFail: true, + prepareMock: prepareMockErrOnConnect, + }, + "unexpected response": { + wantFail: true, + prepareMock: prepareMockUnexpectedResponse, + }, + "empty response": { + wantFail: true, + prepareMock: prepareMockEmptyResponse, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + hdd := New() + mock := test.prepareMock() + hdd.newHddTempConn = func(config Config) hddtempConn { return mock } + + if test.wantFail { + assert.Error(t, hdd.Check()) + } else { + assert.NoError(t, hdd.Check()) + } + }) + } +} + +func TestHddTemp_Collect(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockHddTempConn + wantMetrics map[string]int64 + wantDisconnect bool + wantCharts int + }{ + "all disks ok": { + prepareMock: prepareMockAllDisksOk, + wantDisconnect: true, + wantCharts: 2 * 4, + wantMetrics: map[string]int64{ + "disk_sda_temp_sensor_status_err": 0, + "disk_sda_temp_sensor_status_na": 0, + "disk_sda_temp_sensor_status_nos": 0, + "disk_sda_temp_sensor_status_ok": 1, + "disk_sda_temp_sensor_status_slp": 0, + "disk_sda_temp_sensor_status_unk": 0, + "disk_sda_temperature": 50, + "disk_sdb_temp_sensor_status_err": 0, + "disk_sdb_temp_sensor_status_na": 0, + "disk_sdb_temp_sensor_status_nos": 0, + "disk_sdb_temp_sensor_status_ok": 1, + "disk_sdb_temp_sensor_status_slp": 0, + "disk_sdb_temp_sensor_status_unk": 0, + "disk_sdb_temperature": 49, + "disk_sdc_temp_sensor_status_err": 0, + "disk_sdc_temp_sensor_status_na": 0, + "disk_sdc_temp_sensor_status_nos": 0, + "disk_sdc_temp_sensor_status_ok": 1, + "disk_sdc_temp_sensor_status_slp": 0, + "disk_sdc_temp_sensor_status_unk": 0, + "disk_sdc_temperature": 27, + "disk_sdd_temp_sensor_status_err": 0, + "disk_sdd_temp_sensor_status_na": 0, + "disk_sdd_temp_sensor_status_nos": 0, + "disk_sdd_temp_sensor_status_ok": 1, + "disk_sdd_temp_sensor_status_slp": 0, + "disk_sdd_temp_sensor_status_unk": 0, + "disk_sdd_temperature": 29, + }, + }, + "all disks sleep": { + prepareMock: prepareMockAllDisksSleep, + wantDisconnect: true, + wantCharts: 3, + wantMetrics: map[string]int64{ + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_err": 0, + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_na": 0, + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_nos": 0, + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_ok": 0, + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_slp": 1, + "disk_ata-HUP722020APA330_BFGWU7WF_temp_sensor_status_unk": 0, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_err": 0, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_na": 0, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_nos": 0, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_ok": 0, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_slp": 1, + "disk_ata-HUP722020APA330_BFJ0WS3F_temp_sensor_status_unk": 0, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_err": 0, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_na": 0, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_nos": 0, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_ok": 0, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_slp": 1, + "disk_ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922_temp_sensor_status_unk": 0, + }, + }, + "err on connect": { + prepareMock: prepareMockErrOnConnect, + wantDisconnect: false, + }, + "unexpected response": { + prepareMock: prepareMockUnexpectedResponse, + wantDisconnect: true, + }, + "empty response": { + prepareMock: prepareMockEmptyResponse, + wantDisconnect: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + hdd := New() + mock := test.prepareMock() + hdd.newHddTempConn = func(config Config) hddtempConn { return mock } + + mx := hdd.Collect() + + assert.Equal(t, test.wantMetrics, mx) + assert.Len(t, *hdd.Charts(), test.wantCharts) + assert.Equal(t, test.wantDisconnect, mock.disconnectCalled) + testMetricsHasAllChartsDims(t, hdd, mx) + }) + } +} + +func testMetricsHasAllChartsDims(t *testing.T, hdd *HddTemp, mx map[string]int64) { + for _, chart := range *hdd.Charts() { + if chart.Obsolete { + continue + } + for _, dim := range chart.Dims { + _, ok := mx[dim.ID] + assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) + } + for _, v := range chart.Vars { + _, ok := mx[v.ID] + assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) + } + } +} + +func prepareMockAllDisksOk() *mockHddTempConn { + return &mockHddTempConn{ + hddTempLine: string(dataAllOK), + } +} + +func prepareMockAllDisksSleep() *mockHddTempConn { + return &mockHddTempConn{ + hddTempLine: string(dataAllSleep), + } +} + +func prepareMockErrOnConnect() *mockHddTempConn { + return &mockHddTempConn{ + errOnConnect: true, + } +} + +func prepareMockUnexpectedResponse() *mockHddTempConn { + return &mockHddTempConn{ + hddTempLine: "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + } +} + +func prepareMockEmptyResponse() *mockHddTempConn { + return &mockHddTempConn{ + hddTempLine: "", + } +} + +type mockHddTempConn struct { + errOnConnect bool + errOnQueryHddTemp bool + hddTempLine string + disconnectCalled bool +} + +func (m *mockHddTempConn) connect() error { + if m.errOnConnect { + return errors.New("mock.connect() error") + } + return nil +} + +func (m *mockHddTempConn) disconnect() { + m.disconnectCalled = true +} + +func (m *mockHddTempConn) queryHddTemp() (string, error) { + if m.errOnQueryHddTemp { + return "", errors.New("mock.queryHddTemp() error") + } + return m.hddTempLine, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml b/src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml new file mode 100644 index 00000000000000..74206ebc95fed1 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml @@ -0,0 +1,134 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-hddtemp + plugin_name: go.d.plugin + module_name: hddtemp + monitored_instance: + name: HDD temperature + link: https://linux.die.net/man/8/hddtemp + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "hard-drive.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - hardware + - hdd temperature + - disk temperature + - temperature + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors disk temperatures. + method_description: | + It retrieves temperature data for attached disks by querying the hddtemp daemon at regular intervals. + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634` + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Install hddtemp + description: | + Install `hddtemp` using your distribution's package manager. + configuration: + file: + name: go.d/hddtemp.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: address + description: The IP address and port where the hddtemp daemon listens for connections. + default_value: 127.0.0.1:7634 + required: true + - name: timeout + description: Connection, read, and write timeout duration in seconds. The timeout includes name resolution. + default_value: 1 + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Basic + description: A basic example configuration. + config: | + jobs: + - name: local + address: 127.0.0.1:7634 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + address: 127.0.0.1:7634 + + - name: remote + address: 203.0.113.0:7634 + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: These metrics refer to the Disk. + labels: + - name: disk_id + description: Disk identifier. It is derived from the device path (e.g. sda or ata-HUP722020APA330_BFJ0WS3F) + - name: model + description: Disk model + metrics: + - name: hddtemp.disk_temperature + description: Disk temperature + unit: Celsius + chart_type: line + dimensions: + - name: temperature + - name: hddtemp.disk_temperature_sensor_status + description: Disk temperature sensor status + unit: status + chart_type: line + dimensions: + - name: ok + - name: err + - name: na + - name: unk + - name: nos + - name: slp diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.json b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.json new file mode 100644 index 00000000000000..e868347203bee3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.json @@ -0,0 +1,5 @@ +{ + "update_every": 123, + "address": "ok", + "timeout": 123.123 +} diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.yaml new file mode 100644 index 00000000000000..1b81d09eb8288b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/config.yaml @@ -0,0 +1,3 @@ +update_every: 123 +address: "ok" +timeout: 123.123 diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-ok.txt b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-ok.txt new file mode 100644 index 00000000000000..5f6606e812d200 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-ok.txt @@ -0,0 +1 @@ +|/dev/sda|WDC WD181KRYZ-01AGBB0|122|F||/dev/sdb|WDC WD181KRYZ-01AGBB0|49|C||/dev/sdc|WDC WDS400T1R0A-68A4W0|27|C||/dev/sdd|WDC WDS400T1R0A-68A4W0|29|C| \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-sleep.txt b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-sleep.txt new file mode 100644 index 00000000000000..732b62c7627d3e --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/testdata/hddtemp-all-sleep.txt @@ -0,0 +1 @@ +|/dev/disk/by-id/ata-HUP722020APA330_BFJ0WS3F|HUP722020APA330|SLP|*||/dev/disk/by-id/ata-HUP722020APA330_BFGWU7WF|HUP722020APA330|SLP|*||/dev/disk/by-id/ata-WDC_WD10EARS-00Y5B1_WD-WCAV5R693922|WDC WD10EARS-00Y5B1|SLP|*| \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/init.go b/src/go/collectors/go.d.plugin/modules/init.go index 69e11617f7dc95..90f11029145c0c 100644 --- a/src/go/collectors/go.d.plugin/modules/init.go +++ b/src/go/collectors/go.d.plugin/modules/init.go @@ -29,6 +29,7 @@ import ( _ "github.com/netdata/netdata/go/go.d.plugin/modules/freeradius" _ "github.com/netdata/netdata/go/go.d.plugin/modules/geth" _ "github.com/netdata/netdata/go/go.d.plugin/modules/haproxy" + _ "github.com/netdata/netdata/go/go.d.plugin/modules/hddtemp" _ "github.com/netdata/netdata/go/go.d.plugin/modules/hdfs" _ "github.com/netdata/netdata/go/go.d.plugin/modules/httpcheck" _ "github.com/netdata/netdata/go/go.d.plugin/modules/intelgpu" From efef917b453250fcf3e9098dda244ca7e94f2f40 Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Sun, 21 Apr 2024 16:21:50 +0300 Subject: [PATCH 08/16] Regenerate integrations.js (#17464) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 39 ++++ integrations/integrations.json | 39 ++++ src/collectors/COLLECTORS.md | 2 + .../go.d.plugin/modules/hddtemp/README.md | 1 + .../hddtemp/integrations/hdd_temperature.md | 189 ++++++++++++++++++ 5 files changed, 270 insertions(+) create mode 120000 src/go/collectors/go.d.plugin/modules/hddtemp/README.md create mode 100644 src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md diff --git a/integrations/integrations.js b/integrations/integrations.js index 95728bf2c8f16f..b97bbd5d83c3ad 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -4182,6 +4182,45 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/haproxy/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-hddtemp", + "plugin_name": "go.d.plugin", + "module_name": "hddtemp", + "monitored_instance": { + "name": "HDD temperature", + "link": "https://linux.die.net/man/8/hddtemp", + "categories": [ + "data-collection.hardware-devices-and-sensors" + ], + "icon_filename": "hard-drive.svg" + }, + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "keywords": [ + "hardware", + "hdd temperature", + "disk temperature", + "temperature" + ], + "most_popular": false + }, + "overview": "# HDD temperature\n\nPlugin: go.d.plugin\nModule: hddtemp\n\n## Overview\n\nThis collector monitors disk temperatures.\n\n\nIt retrieves temperature data for attached disks by querying the hddtemp daemon at regular intervals.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Install hddtemp\n\nInstall `hddtemp` using your distribution's package manager.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/hddtemp.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/hddtemp.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every, autodetection_retry.\n\n\n{% details summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 1 | no |\n| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no |\n| address | The IP address and port where the hddtemp daemon listens for connections. | 127.0.0.1:7634 | yes |\n| timeout | Connection, read, and write timeout duration in seconds. The timeout includes name resolution. | 1 | no |\n\n{% /details %}\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n{% details summary=\"Config\" %}\n```yaml\njobs:\n - name: local\n address: 127.0.0.1:7634\n\n```\n{% /details %}\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n{% details summary=\"Config\" %}\n```yaml\njobs:\n - name: local\n address: 127.0.0.1:7634\n\n - name: remote\n address: 203.0.113.0:7634\n\n```\n{% /details %}\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `hddtemp` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m hddtemp\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per disk\n\nThese metrics refer to the Disk.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| disk_id | Disk identifier. It is derived from the device path (e.g. sda or ata-HUP722020APA330_BFJ0WS3F) |\n| model | Disk model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| hddtemp.disk_temperature | temperature | Celsius |\n| hddtemp.disk_temperature_sensor_status | ok, err, na, unk, nos, slp | status |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-hddtemp-HDD_temperature", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-hfs", diff --git a/integrations/integrations.json b/integrations/integrations.json index 21da9132cc0b16..48a6c900f7ee16 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -4180,6 +4180,45 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/haproxy/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-hddtemp", + "plugin_name": "go.d.plugin", + "module_name": "hddtemp", + "monitored_instance": { + "name": "HDD temperature", + "link": "https://linux.die.net/man/8/hddtemp", + "categories": [ + "data-collection.hardware-devices-and-sensors" + ], + "icon_filename": "hard-drive.svg" + }, + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "keywords": [ + "hardware", + "hdd temperature", + "disk temperature", + "temperature" + ], + "most_popular": false + }, + "overview": "# HDD temperature\n\nPlugin: go.d.plugin\nModule: hddtemp\n\n## Overview\n\nThis collector monitors disk temperatures.\n\n\nIt retrieves temperature data for attached disks by querying the hddtemp daemon at regular intervals.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Install hddtemp\n\nInstall `hddtemp` using your distribution's package manager.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/hddtemp.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/hddtemp.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every, autodetection_retry.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 1 | no |\n| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no |\n| address | The IP address and port where the hddtemp daemon listens for connections. | 127.0.0.1:7634 | yes |\n| timeout | Connection, read, and write timeout duration in seconds. The timeout includes name resolution. | 1 | no |\n\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\njobs:\n - name: local\n address: 127.0.0.1:7634\n\n```\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n```yaml\njobs:\n - name: local\n address: 127.0.0.1:7634\n\n - name: remote\n address: 203.0.113.0:7634\n\n```\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `hddtemp` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m hddtemp\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per disk\n\nThese metrics refer to the Disk.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| disk_id | Disk identifier. It is derived from the device path (e.g. sda or ata-HUP722020APA330_BFJ0WS3F) |\n| model | Disk model |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| hddtemp.disk_temperature | temperature | Celsius |\n| hddtemp.disk_temperature_sensor_status | ok, err, na, unk, nos, slp | status |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-hddtemp-HDD_temperature", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hddtemp/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-hfs", diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index 26b81e77ec545d..48831e456deb95 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -493,6 +493,8 @@ If you don't see the app/service you'd like to monitor in this list: - [HDD temperature](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md) +- [HDD temperature](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md) + - [HP iLO](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hp_ilo.md) - [IBM CryptoExpress (CEX) cards](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_cryptoexpress_cex_cards.md) diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/README.md b/src/go/collectors/go.d.plugin/modules/hddtemp/README.md new file mode 120000 index 00000000000000..95c7593f803357 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/README.md @@ -0,0 +1 @@ +integrations/hdd_temperature.md \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md b/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md new file mode 100644 index 00000000000000..f0f272bdbc471a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md @@ -0,0 +1,189 @@ + + +# HDD temperature + + + + + +Plugin: go.d.plugin +Module: hddtemp + + + +## Overview + +This collector monitors disk temperatures. + + +It retrieves temperature data for attached disks by querying the hddtemp daemon at regular intervals. + + +This collector is only supported on the following platforms: + +- Linux + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634` + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per disk + +These metrics refer to the Disk. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| disk_id | Disk identifier. It is derived from the device path (e.g. sda or ata-HUP722020APA330_BFJ0WS3F) | +| model | Disk model | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| hddtemp.disk_temperature | temperature | Celsius | +| hddtemp.disk_temperature_sensor_status | ok, err, na, unk, nos, slp | status | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +#### Install hddtemp + +Install `hddtemp` using your distribution's package manager. + + + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/hddtemp.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/hddtemp.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| address | The IP address and port where the hddtemp daemon listens for connections. | 127.0.0.1:7634 | yes | +| timeout | Connection, read, and write timeout duration in seconds. The timeout includes name resolution. | 1 | no | + +
+ +#### Examples + +##### Basic + +A basic example configuration. + +
Config + +```yaml +jobs: + - name: local + address: 127.0.0.1:7634 + +``` +
+ +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +
Config + +```yaml +jobs: + - name: local + address: 127.0.0.1:7634 + + - name: remote + address: 203.0.113.0:7634 + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `hddtemp` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m hddtemp + ``` + + From 94f6bd445638ab5b769d47d402f09b4d1d4f87b5 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Sun, 21 Apr 2024 17:20:37 +0300 Subject: [PATCH 09/16] remove python.d/hddtemp (#17463) --- CMakeLists.txt | 2 - .../python.d.plugin/hddtemp/README.md | 1 - .../python.d.plugin/hddtemp/hddtemp.chart.py | 99 -------- .../python.d.plugin/hddtemp/hddtemp.conf | 95 -------- .../hddtemp/integrations/hdd_temperature.md | 217 ------------------ .../python.d.plugin/hddtemp/metadata.yaml | 163 ------------- src/collectors/python.d.plugin/python.d.conf | 2 +- 7 files changed, 1 insertion(+), 578 deletions(-) delete mode 120000 src/collectors/python.d.plugin/hddtemp/README.md delete mode 100644 src/collectors/python.d.plugin/hddtemp/hddtemp.chart.py delete mode 100644 src/collectors/python.d.plugin/hddtemp/hddtemp.conf delete mode 100644 src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md delete mode 100644 src/collectors/python.d.plugin/hddtemp/metadata.yaml diff --git a/CMakeLists.txt b/CMakeLists.txt index 80495b36b7d61d..58cbf818d8cbbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2555,7 +2555,6 @@ install(FILES src/collectors/python.d.plugin/gearman/gearman.conf src/collectors/python.d.plugin/go_expvar/go_expvar.conf src/collectors/python.d.plugin/haproxy/haproxy.conf - src/collectors/python.d.plugin/hddtemp/hddtemp.conf src/collectors/python.d.plugin/hpssa/hpssa.conf src/collectors/python.d.plugin/icecast/icecast.conf src/collectors/python.d.plugin/ipfs/ipfs.conf @@ -2604,7 +2603,6 @@ install(FILES src/collectors/python.d.plugin/gearman/gearman.chart.py src/collectors/python.d.plugin/go_expvar/go_expvar.chart.py src/collectors/python.d.plugin/haproxy/haproxy.chart.py - src/collectors/python.d.plugin/hddtemp/hddtemp.chart.py src/collectors/python.d.plugin/hpssa/hpssa.chart.py src/collectors/python.d.plugin/icecast/icecast.chart.py src/collectors/python.d.plugin/ipfs/ipfs.chart.py diff --git a/src/collectors/python.d.plugin/hddtemp/README.md b/src/collectors/python.d.plugin/hddtemp/README.md deleted file mode 120000 index 95c7593f803357..00000000000000 --- a/src/collectors/python.d.plugin/hddtemp/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/hdd_temperature.md \ No newline at end of file diff --git a/src/collectors/python.d.plugin/hddtemp/hddtemp.chart.py b/src/collectors/python.d.plugin/hddtemp/hddtemp.chart.py deleted file mode 100644 index 6427aa1804eda2..00000000000000 --- a/src/collectors/python.d.plugin/hddtemp/hddtemp.chart.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: hddtemp netdata python.d module -# Author: Pawel Krupa (paulfantom) -# Author: Ilya Mashchenko (ilyam8) -# SPDX-License-Identifier: GPL-3.0-or-later - - -import re -from copy import deepcopy - -from bases.FrameworkServices.SocketService import SocketService - -ORDER = [ - 'temperatures', -] - -CHARTS = { - 'temperatures': { - 'options': ['disks_temp', 'Disks Temperatures', 'Celsius', 'temperatures', 'hddtemp.temperatures', 'line'], - 'lines': [ - # lines are created dynamically in `check()` method - ]}} - -RE = re.compile(r'\/dev\/([^|]+)\|([^|]+)\|([0-9]+|SLP|UNK)\|') - - -class Disk: - def __init__(self, id_, name, temp): - self.id = id_.split('/')[-1] - self.name = name.replace(' ', '_') - self.temp = temp if temp.isdigit() else None - - def __repr__(self): - return self.id - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - SocketService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = deepcopy(CHARTS) - self.do_only = self.configuration.get('devices') - self._keep_alive = False - self.request = "" - self.host = "127.0.0.1" - self.port = 7634 - - def get_disks(self): - r = self._get_raw_data() - - if not r: - return None - - m = RE.findall(r) - - if not m: - self.error("received data doesn't have needed records") - return None - - rv = [Disk(*d) for d in m] - self.debug('available disks: {0}'.format(rv)) - - if self.do_only: - return [v for v in rv if v.id in self.do_only] - return rv - - def get_data(self): - """ - Get data from TCP/IP socket - :return: dict - """ - - disks = self.get_disks() - - if not disks: - return None - - return dict((d.id, d.temp) for d in disks) - - def check(self): - """ - Parse configuration, check if hddtemp is available, and dynamically create chart lines data - :return: boolean - """ - self._parse_config() - disks = self.get_disks() - - if not disks: - return False - - for d in disks: - dim = [d.id] - self.definitions['temperatures']['lines'].append(dim) - - return True - - @staticmethod - def _check_raw_data(data): - return not bool(data) diff --git a/src/collectors/python.d.plugin/hddtemp/hddtemp.conf b/src/collectors/python.d.plugin/hddtemp/hddtemp.conf deleted file mode 100644 index b2d7aef6320933..00000000000000 --- a/src/collectors/python.d.plugin/hddtemp/hddtemp.conf +++ /dev/null @@ -1,95 +0,0 @@ -# netdata python.d.plugin configuration for hddtemp -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, hddtemp also supports the following: -# -# host: 'IP or HOSTNAME' # the host to connect to -# port: PORT # the port to connect to -# - -# By default this module will try to autodetect disks -# (autodetection works only for disk which names start with "sd"). -# However this can be overridden by setting variable `disks` to -# array of desired disks. Example for two disks: -# -# devices: -# - sda -# - sdb -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name: 'local' - host: 'localhost' - port: 7634 - -localipv4: - name: 'local' - host: '127.0.0.1' - port: 7634 - -localipv6: - name: 'local' - host: '::1' - port: 7634 diff --git a/src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md b/src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md deleted file mode 100644 index 9b39d1371db82b..00000000000000 --- a/src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md +++ /dev/null @@ -1,217 +0,0 @@ - - -# HDD temperature - - - - - -Plugin: python.d.plugin -Module: hddtemp - - - -## Overview - -This collector monitors disk temperatures. - - -It uses the `hddtemp` daemon to gather the metrics. - - -This collector is only supported on the following platforms: - -- Linux - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634` - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per HDD temperature instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| hddtemp.temperatures | a dimension per disk | Celsius | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Run `hddtemp` in daemon mode - -You can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument. - -So running `hddtemp -d` would run the daemon, by default on port 7634. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/hddtemp.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/hddtemp.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - -By default this collector will try to autodetect disks (autodetection works only for disk which names start with "sd"). However this can be overridden by setting the option `disks` to an array of desired disks. - - -
Config options - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | -| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | no | -| host | The IP or HOSTNAME to connect to. | localhost | yes | -| port | The port to connect to. | 7634 | no | - -
- -#### Examples - -##### Basic - -A basic example configuration. - -```yaml -localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - -``` -##### Custom disk names - -An example defining the disk names to detect. - -
Config - -```yaml -localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - devices: - - customdisk1 - - customdisk2 - -``` -
- -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -
Config - -```yaml -localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - -remote_job: - name : 'remote' - host : 'http://192.0.2.1:2812' - -``` -
- - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `hddtemp` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin hddtemp debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/hddtemp/metadata.yaml b/src/collectors/python.d.plugin/hddtemp/metadata.yaml deleted file mode 100644 index d8b56fc66ec9f8..00000000000000 --- a/src/collectors/python.d.plugin/hddtemp/metadata.yaml +++ /dev/null @@ -1,163 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: hddtemp - monitored_instance: - name: HDD temperature - link: https://linux.die.net/man/8/hddtemp - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: "hard-drive.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - hardware - - hdd temperature - - disk temperature - - temperature - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors disk temperatures. - method_description: | - It uses the `hddtemp` daemon to gather the metrics. - supported_platforms: - include: - - Linux - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634` - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Run `hddtemp` in daemon mode - description: | - You can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument. - - So running `hddtemp -d` would run the daemon, by default on port 7634. - configuration: - file: - name: "python.d/hddtemp.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - By default this collector will try to autodetect disks (autodetection works only for disk which names start with "sd"). However this can be overridden by setting the option `disks` to an array of desired disks. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: > - Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "local" - required: false - - name: devices - description: Array of desired disks to detect, in case their name doesn't start with `sd`. - default_value: "" - required: false - - name: host - description: The IP or HOSTNAME to connect to. - default_value: "localhost" - required: true - - name: port - description: The port to connect to. - default_value: 7634 - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic example configuration. - folding: - enabled: false - config: | - localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - - name: Custom disk names - description: An example defining the disk names to detect. - config: | - localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - devices: - - customdisk1 - - customdisk2 - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - localhost: - name: 'local' - host: '127.0.0.1' - port: 7634 - - remote_job: - name : 'remote' - host : 'http://192.0.2.1:2812' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: hddtemp.temperatures - description: Disk Temperatures - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per disk diff --git a/src/collectors/python.d.plugin/python.d.conf b/src/collectors/python.d.plugin/python.d.conf index 9466a533d529c6..c73e2f587ff1e4 100644 --- a/src/collectors/python.d.plugin/python.d.conf +++ b/src/collectors/python.d.plugin/python.d.conf @@ -44,7 +44,7 @@ example: no go_expvar: no # haproxy: yes -# hddtemp: yes +hddtemp: no # replaced with go.d/hddtemp. Disabled for existing installations. hpssa: no # icecast: yes # ipfs: yes From cbdb95b958f64788ea5bcc76459d03ede1c7d294 Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Sun, 21 Apr 2024 17:29:24 +0300 Subject: [PATCH 10/16] Regenerate integrations.js (#17465) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 38 ---------------------------------- integrations/integrations.json | 38 ---------------------------------- src/collectors/COLLECTORS.md | 2 -- 3 files changed, 78 deletions(-) diff --git a/integrations/integrations.js b/integrations/integrations.js index b97bbd5d83c3ad..ae51bc4e40c5d6 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -18589,44 +18589,6 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/go_expvar/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "python.d.plugin", - "module_name": "hddtemp", - "monitored_instance": { - "name": "HDD temperature", - "link": "https://linux.die.net/man/8/hddtemp", - "categories": [ - "data-collection.hardware-devices-and-sensors" - ], - "icon_filename": "hard-drive.svg" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "hardware", - "hdd temperature", - "disk temperature", - "temperature" - ], - "most_popular": false - }, - "overview": "# HDD temperature\n\nPlugin: python.d.plugin\nModule: hddtemp\n\n## Overview\n\nThis collector monitors disk temperatures.\n\n\nIt uses the `hddtemp` daemon to gather the metrics.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Run `hddtemp` in daemon mode\n\nYou can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument.\n\nSo running `hddtemp -d` would run the daemon, by default on port 7634.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/hddtemp.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/hddtemp.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.\n\nBy default this collector will try to autodetect disks (autodetection works only for disk which names start with \"sd\"). However this can be overridden by setting the option `disks` to an array of desired disks.\n\n\n{% details summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 1 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no |\n| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | no |\n| host | The IP or HOSTNAME to connect to. | localhost | yes |\n| port | The port to connect to. | 7634 | no |\n\n{% /details %}\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n\n```\n##### Custom disk names\n\nAn example defining the disk names to detect.\n\n{% details summary=\"Config\" %}\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n devices:\n - customdisk1\n - customdisk2\n\n```\n{% /details %}\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n{% details summary=\"Config\" %}\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n\nremote_job:\n name : 'remote'\n host : 'http://192.0.2.1:2812'\n\n```\n{% /details %}\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `hddtemp` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin hddtemp debug trace\n ```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per HDD temperature instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| hddtemp.temperatures | a dimension per disk | Celsius |\n\n", - "integration_type": "collector", - "id": "python.d.plugin-hddtemp-HDD_temperature", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/hddtemp/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "python.d.plugin", diff --git a/integrations/integrations.json b/integrations/integrations.json index 48a6c900f7ee16..10553d658d5bca 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -18587,44 +18587,6 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/go_expvar/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "python.d.plugin", - "module_name": "hddtemp", - "monitored_instance": { - "name": "HDD temperature", - "link": "https://linux.die.net/man/8/hddtemp", - "categories": [ - "data-collection.hardware-devices-and-sensors" - ], - "icon_filename": "hard-drive.svg" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "hardware", - "hdd temperature", - "disk temperature", - "temperature" - ], - "most_popular": false - }, - "overview": "# HDD temperature\n\nPlugin: python.d.plugin\nModule: hddtemp\n\n## Overview\n\nThis collector monitors disk temperatures.\n\n\nIt uses the `hddtemp` daemon to gather the metrics.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nBy default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634`\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Run `hddtemp` in daemon mode\n\nYou can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument.\n\nSo running `hddtemp -d` would run the daemon, by default on port 7634.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/hddtemp.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/hddtemp.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.\n\nBy default this collector will try to autodetect disks (autodetection works only for disk which names start with \"sd\"). However this can be overridden by setting the option `disks` to an array of desired disks.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 1 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no |\n| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | no |\n| host | The IP or HOSTNAME to connect to. | localhost | yes |\n| port | The port to connect to. | 7634 | no |\n\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n\n```\n##### Custom disk names\n\nAn example defining the disk names to detect.\n\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n devices:\n - customdisk1\n - customdisk2\n\n```\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n```yaml\nlocalhost:\n name: 'local'\n host: '127.0.0.1'\n port: 7634\n\nremote_job:\n name : 'remote'\n host : 'http://192.0.2.1:2812'\n\n```\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `hddtemp` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin hddtemp debug trace\n ```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per HDD temperature instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| hddtemp.temperatures | a dimension per disk | Celsius |\n\n", - "integration_type": "collector", - "id": "python.d.plugin-hddtemp-HDD_temperature", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/hddtemp/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "python.d.plugin", diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index 48831e456deb95..a1a64b14766b18 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -491,8 +491,6 @@ If you don't see the app/service you'd like to monitor in this list: - [CUPS](https://github.com/netdata/netdata/blob/master/src/collectors/cups.plugin/integrations/cups.md) -- [HDD temperature](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md) - - [HDD temperature](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md) - [HP iLO](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hp_ilo.md) From bdbf369d6f6fca3ad9238dccee3db8f1bf41aac1 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Mon, 22 Apr 2024 00:17:10 +0000 Subject: [PATCH 11/16] [ci skip] Update changelog and version for nightly build: v1.45.0-241-nightly. --- CHANGELOG.md | 7 ++++--- packaging/version | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb716ceba51d97..00b880fb0454ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ **Merged pull requests:** +- Regenerate integrations.js [\#17465](https://github.com/netdata/netdata/pull/17465) ([netdatabot](https://github.com/netdatabot)) +- Regenerate integrations.js [\#17464](https://github.com/netdata/netdata/pull/17464) ([netdatabot](https://github.com/netdatabot)) +- remove python.d/hddtemp [\#17463](https://github.com/netdata/netdata/pull/17463) ([ilyam8](https://github.com/ilyam8)) +- go.d add hddtemp [\#17462](https://github.com/netdata/netdata/pull/17462) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#17461](https://github.com/netdata/netdata/pull/17461) ([netdatabot](https://github.com/netdatabot)) - go.d storcli update [\#17460](https://github.com/netdata/netdata/pull/17460) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#17458](https://github.com/netdata/netdata/pull/17458) ([netdatabot](https://github.com/netdatabot)) @@ -415,9 +419,6 @@ - highlight Challenge Secret title to be more visible [\#16882](https://github.com/netdata/netdata/pull/16882) ([juacker](https://github.com/juacker)) - add the CLOEXEC flag to all sockets and files [\#16881](https://github.com/netdata/netdata/pull/16881) ([ktsaou](https://github.com/ktsaou)) - Network viewer UI minor fixes [\#16880](https://github.com/netdata/netdata/pull/16880) ([ktsaou](https://github.com/ktsaou)) -- Network viewer fixes [\#16877](https://github.com/netdata/netdata/pull/16877) ([ktsaou](https://github.com/ktsaou)) -- Add requirements.txt for dag [\#16875](https://github.com/netdata/netdata/pull/16875) ([vkalintiris](https://github.com/vkalintiris)) -- Rm refs to map and save modes [\#16874](https://github.com/netdata/netdata/pull/16874) ([vkalintiris](https://github.com/vkalintiris)) ## [v1.44.3](https://github.com/netdata/netdata/tree/v1.44.3) (2024-02-12) diff --git a/packaging/version b/packaging/version index 5ac470c124b2f5..e56168e8f6ba0a 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.45.0-236-nightly +v1.45.0-241-nightly From f63f41f54d5295ec77fbca3ea097f447fb8ffab2 Mon Sep 17 00:00:00 2001 From: Hugo Valente <82235632+hugovalente-pm@users.noreply.github.com> Date: Mon, 22 Apr 2024 06:13:59 +0100 Subject: [PATCH 12/16] add section for regenerate claiming token (#17457) * add section for regenerate claiming token * missing h * missing If at begh * missing If at n on token * add example of silencing alert instance --- ...nage-alert-notification-silencing-rules.md | 19 ++++++++++--------- src/claim/README.md | 11 +++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md b/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md index b9806c6fa729a2..806b57bdc66d59 100644 --- a/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md +++ b/docs/cloud/alerts-notifications/manage-alert-notification-silencing-rules.md @@ -46,13 +46,14 @@ To manage your **personal alert notification silencing rule settings**, you will ## Silencing rules examples -| Rule name | War Rooms | Nodes | Host Label | Alert name | Alert context | Alert role | Description | -| :-- | :-- | :-- | :-- | :-- | :-- | :-- | :--| +| Rule name | War Rooms | Nodes | Host Label | Alert name | Alert context | Alert instance | Alert role | Description | +| :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :--| | Space silencing | All Rooms | * | * | * | * | * | This rule silences the entire space, targets all nodes and for all users. E.g. infrastructure wide maintenance window. | -| DB Servers Rooms | PostgreSQL Servers | * | * | * | * | * | This rules silences the nodes in the room named PostgreSQL Servers, for example it doesn't silence the `All Nodes` room. E.g. My team with membership to this room doesn't want to receive notifications for these nodes. | -| Node child1 | All Rooms | `child1` | * | * | * | * | This rule silences all alert state transitions for node `child1` on all rooms and for all users. E.g. node could be going under maintenance. | -| Production nodes | All Rooms | * | `environment:production` | * | * | * | This rule silences all alert state transitions for nodes with the host label key-value pair `environment:production`. E.g. Maintenance window on nodes with specific host labels. | -| Third party maintenance | All Rooms | * | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | This rule silences this specific alert since third party partner will be undergoing maintenance. | -| Intended stress usage on CPU | All Rooms | * | * | * | `system.cpu` | * | This rule silences specific alerts across all nodes and their CPU cores. | -| Silence role webmaster | All Rooms | * | * | * | * | `webmaster` | This rule silences all alerts configured with the role `webmaster`. | -| Silence alert on node | All Rooms | `child1` | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | This rule silences the specific alert on the `child1` node. | +| DB Servers Rooms | PostgreSQL Servers | * | * | * | * | * | * | This rules silences the nodes in the room named PostgreSQL Servers, for example it doesn't silence the `All Nodes` room. E.g. My team with membership to this room doesn't want to receive notifications for these nodes. | +| Node child1 | All Rooms | `child1` | * | * | * | * | * | This rule silences all alert state transitions for node `child1` on all rooms and for all users. E.g. node could be going under maintenance. | +| Production nodes | All Rooms | * | `environment:production` | * | * | * | * | This rule silences all alert state transitions for nodes with the host label key-value pair `environment:production`. E.g. Maintenance window on nodes with specific host labels. | +| Third party maintenance | All Rooms | * | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | * | This rule silences this specific alert since third party partner will be undergoing maintenance. | +| Intended stress usage on CPU | All Rooms | * | * | * | `system.cpu` | * | * | This rule silences specific alerts across all nodes and their CPU cores. | +| Silence role webmaster | All Rooms | * | * | * | * | * | `webmaster` | This rule silences all alerts configured with the role `webmaster`. | +| Silence alert on node | All Rooms | `child1` | * | `httpcheck_posthog_netdata_cloud.request_status` | * | * | * | This rule silences the specific alert on the `child1` node. | +| Disk Space alerts on mount point | All Rooms | * | * | `disk_space_usage` | `disk.space` | `disk_space_opt_baddisk` | * | This rule silences the specific alert instance on all nodes `/opt/baddisk`. | diff --git a/src/claim/README.md b/src/claim/README.md index b6f785780296be..d6526e5c4fcbb9 100644 --- a/src/claim/README.md +++ b/src/claim/README.md @@ -554,6 +554,17 @@ To remove a node from you Space in Netdata Cloud, and connect it to another Spac If you are using a `docker-compose.yml` file, you will have to overwrite it with the new claiming token. The node should now appear online in that Space. +### Regenerate Claiming Token + +If in case of some security reason, or other, you need to revoke your previous claiming token and generate a new +one you can achieve that from the Netdata Cloud UI. + +On any screen where you see the connect the node to Netdata Cloud command you'll see above it, next to the +[updates channel](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/versions-and-platforms.md), a +button to **Regenerate token**. This action will invalidate your previous token and generate a fresh new one. + +Only the administrators of a Space in Netdata Cloud can trigger this action. + ## Connecting reference In the sections below, you can find reference material for the kickstart script, claiming script, connecting via the Agent's command line From 827fe2afcf761380c465b7878eced4a0f3a12a6e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:19:18 +0300 Subject: [PATCH 13/16] Bump github.com/likexian/whois-parser from 1.24.12 to 1.24.15 in /src/go/collectors/go.d.plugin (#17469) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- src/go/collectors/go.d.plugin/go.mod | 4 ++-- src/go/collectors/go.d.plugin/go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/go/collectors/go.d.plugin/go.mod b/src/go/collectors/go.d.plugin/go.mod index bd0c6c8097bf78..ed9254f74c3667 100644 --- a/src/go/collectors/go.d.plugin/go.mod +++ b/src/go/collectors/go.d.plugin/go.mod @@ -29,7 +29,7 @@ require ( github.com/jackc/pgx/v4 v4.18.3 github.com/jessevdk/go-flags v1.5.0 github.com/likexian/whois v1.15.2 - github.com/likexian/whois-parser v1.24.12 + github.com/likexian/whois-parser v1.24.15 github.com/lmittmann/tint v1.0.4 github.com/mattn/go-isatty v0.0.20 github.com/mattn/go-xmlrpc v0.0.3 @@ -98,7 +98,7 @@ require ( github.com/josharian/native v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.4 // indirect - github.com/likexian/gokit v0.25.13 // indirect + github.com/likexian/gokit v0.25.15 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mdlayher/genetlink v1.3.2 // indirect github.com/mdlayher/netlink v1.7.2 // indirect diff --git a/src/go/collectors/go.d.plugin/go.sum b/src/go/collectors/go.d.plugin/go.sum index 1537c2416cbdb0..da890b45b34aad 100644 --- a/src/go/collectors/go.d.plugin/go.sum +++ b/src/go/collectors/go.d.plugin/go.sum @@ -217,12 +217,12 @@ github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/likexian/gokit v0.25.13 h1:p2Uw3+6fGG53CwdU2Dz0T6bOycdb2+bAFAa3ymwWVkM= -github.com/likexian/gokit v0.25.13/go.mod h1:qQhEWFBEfqLCO3/vOEo2EDKd+EycekVtUK4tex+l2H4= +github.com/likexian/gokit v0.25.15 h1:QjospM1eXhdMMHwZRpMKKAHY/Wig9wgcREmLtf9NslY= +github.com/likexian/gokit v0.25.15/go.mod h1:S2QisdsxLEHWeD/XI0QMVeggp+jbxYqUxMvSBil7MRg= github.com/likexian/whois v1.15.2 h1:p/K2I3tRDT0h/l5XvMNbX0zL3/Nohu5gzVGZIkucm9g= github.com/likexian/whois v1.15.2/go.mod h1:m4FoiLAV1TghTwChwNrHTFv0HSWR6EWcES95CHQfnAc= -github.com/likexian/whois-parser v1.24.12 h1:3YnoBlpQMGmTdBr33SW/VxHxe2osuss4sN7E2KH7V/U= -github.com/likexian/whois-parser v1.24.12/go.mod h1:b6STMHHDaSKbd4PzGrP50wWE5NzeBUETa/hT9gI0G9I= +github.com/likexian/whois-parser v1.24.15 h1:wAZbvVdUHxyF4A0EURrr7YtfSO9GD79ovUdDVDRXFYY= +github.com/likexian/whois-parser v1.24.15/go.mod h1:k5zmKRZ7xPg1TLv3BGT4g/LOPRIMhvdNMeB0F53V/jk= github.com/lmittmann/tint v1.0.4 h1:LeYihpJ9hyGvE0w+K2okPTGUdVLfng1+nDNVR4vWISc= github.com/lmittmann/tint v1.0.4/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= From b7c4d442c5e0f61ba5eacd042d921955b3e763b3 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Mon, 22 Apr 2024 11:32:47 +0300 Subject: [PATCH 14/16] go.d add sensors (#17466) --- src/go/collectors/go.d.plugin/README.md | 2 + .../collectors/go.d.plugin/config/go.d.conf | 1 + .../go.d.plugin/config/go.d/sensors.conf | 6 + src/go/collectors/go.d.plugin/modules/init.go | 1 + .../go.d.plugin/modules/sensors/charts.go | 159 +++++++++ .../go.d.plugin/modules/sensors/collect.go | 179 ++++++++++ .../modules/sensors/config_schema.json | 47 +++ .../go.d.plugin/modules/sensors/exec.go | 41 +++ .../go.d.plugin/modules/sensors/init.go | 38 +++ .../go.d.plugin/modules/sensors/metadata.yaml | 157 +++++++++ .../go.d.plugin/modules/sensors/sensors.go | 111 +++++++ .../modules/sensors/sensors_test.go | 308 ++++++++++++++++++ .../modules/sensors/testdata/config.json | 5 + .../modules/sensors/testdata/config.yaml | 3 + .../sensors-temp-in-curr-power-fan.txt | 72 ++++ .../modules/sensors/testdata/sensors-temp.txt | 81 +++++ .../modules/zfspool/zfspool_test.go | 2 - 17 files changed, 1211 insertions(+), 2 deletions(-) create mode 100644 src/go/collectors/go.d.plugin/config/go.d/sensors.conf create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/charts.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/collect.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/config_schema.json create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/exec.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/init.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/sensors.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/sensors_test.go create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/testdata/config.json create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/testdata/config.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp-in-curr-power-fan.txt create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp.txt diff --git a/src/go/collectors/go.d.plugin/README.md b/src/go/collectors/go.d.plugin/README.md index fc688ada017b24..b7f35a1e54f98b 100644 --- a/src/go/collectors/go.d.plugin/README.md +++ b/src/go/collectors/go.d.plugin/README.md @@ -76,6 +76,7 @@ see the appropriate collector readme. | [fluentd](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/fluentd) | Fluentd | | [freeradius](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/freeradius) | FreeRADIUS | | [haproxy](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/haproxy) | HAProxy | +| [hddtemp](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/hddtemp) | Disks temperature | | [hdfs](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/hdfs) | HDFS | | [httpcheck](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/httpcheck) | Any HTTP Endpoint | | [intelgpu](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/intelgpu) | Intel integrated GPU | @@ -113,6 +114,7 @@ see the appropriate collector readme. | [rabbitmq](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/rabbitmq) | RabbitMQ | | [redis](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/redis) | Redis | | [scaleio](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/scaleio) | Dell EMC ScaleIO | +| [sensors](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules) | Hardware Sensors | | [SNMP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/snmp) | SNMP | | [squidlog](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/squidlog) | Squid | | [storcli](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/storcli) | Broadcom Hardware RAID | diff --git a/src/go/collectors/go.d.plugin/config/go.d.conf b/src/go/collectors/go.d.plugin/config/go.d.conf index 9fe91db5d95a0b..928cb15a06035e 100644 --- a/src/go/collectors/go.d.plugin/config/go.d.conf +++ b/src/go/collectors/go.d.plugin/config/go.d.conf @@ -76,6 +76,7 @@ modules: # rabbitmq: yes # redis: yes # scaleio: yes +# sensors: yes # snmp: yes # squidlog: yes # storcli: yes diff --git a/src/go/collectors/go.d.plugin/config/go.d/sensors.conf b/src/go/collectors/go.d.plugin/config/go.d/sensors.conf new file mode 100644 index 00000000000000..3b8febde89b58b --- /dev/null +++ b/src/go/collectors/go.d.plugin/config/go.d/sensors.conf @@ -0,0 +1,6 @@ +## All available configuration options, their descriptions and default values: +## https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/sensors#readme + +jobs: + - name: sensors + binary_path: /usr/bin/sensors diff --git a/src/go/collectors/go.d.plugin/modules/init.go b/src/go/collectors/go.d.plugin/modules/init.go index 90f11029145c0c..8691ca024d2264 100644 --- a/src/go/collectors/go.d.plugin/modules/init.go +++ b/src/go/collectors/go.d.plugin/modules/init.go @@ -68,6 +68,7 @@ import ( _ "github.com/netdata/netdata/go/go.d.plugin/modules/rabbitmq" _ "github.com/netdata/netdata/go/go.d.plugin/modules/redis" _ "github.com/netdata/netdata/go/go.d.plugin/modules/scaleio" + _ "github.com/netdata/netdata/go/go.d.plugin/modules/sensors" _ "github.com/netdata/netdata/go/go.d.plugin/modules/snmp" _ "github.com/netdata/netdata/go/go.d.plugin/modules/squidlog" _ "github.com/netdata/netdata/go/go.d.plugin/modules/storcli" diff --git a/src/go/collectors/go.d.plugin/modules/sensors/charts.go b/src/go/collectors/go.d.plugin/modules/sensors/charts.go new file mode 100644 index 00000000000000..20df057c88230a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/charts.go @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioSensorTemperature = module.Priority + iota + prioSensorVoltage + prioSensorCurrent + prioSensorPower + prioSensorFan + prioSensorEnergy + prioSensorHumidity +) + +var sensorTemperatureChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_temperature", + Title: "Sensor temperature", + Units: "Celsius", + Fam: "temperature", + Ctx: "sensors.sensor_temperature", + Type: module.Line, + Priority: prioSensorTemperature, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "temperature", Div: precision}, + }, +} + +var sensorVoltageChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_voltage", + Title: "Sensor voltage", + Units: "Volts", + Fam: "voltage", + Ctx: "sensors.sensor_voltage", + Type: module.Line, + Priority: prioSensorVoltage, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "voltage", Div: precision}, + }, +} + +var sensorCurrentChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_current", + Title: "Sensor current", + Units: "Amperes", + Fam: "current", + Ctx: "sensors.sensor_current", + Type: module.Line, + Priority: prioSensorCurrent, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "current", Div: precision}, + }, +} + +var sensorPowerChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_power", + Title: "Sensor power", + Units: "Watts", + Fam: "power", + Ctx: "sensors.sensor_power", + Type: module.Line, + Priority: prioSensorPower, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "power", Div: precision}, + }, +} + +var sensorFanChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_fan", + Title: "Sensor fan speed", + Units: "RPM", + Fam: "fan", + Ctx: "sensors.sensor_fan_speed", + Type: module.Line, + Priority: prioSensorFan, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "fan", Div: precision}, + }, +} + +var sensorEnergyChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_energy", + Title: "Sensor energy", + Units: "Joules", + Fam: "energy", + Ctx: "sensors.sensor_energy", + Type: module.Line, + Priority: prioSensorEnergy, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "energy", Div: precision}, + }, +} + +var sensorHumidityChartTmpl = module.Chart{ + ID: "sensor_chip_%s_feature_%s_subfeature_%s_humidity", + Title: "Sensor humidity", + Units: "percent", + Fam: "humidity", + Ctx: "sensors.sensor_humidity", + Type: module.Area, + Priority: prioSensorHumidity, + Dims: module.Dims{ + {ID: "sensor_chip_%s_feature_%s_subfeature_%s", Name: "humidity", Div: precision}, + }, +} + +func (s *Sensors) addSensorChart(sn sensorStats) { + var chart *module.Chart + + switch sensorType(sn) { + case sensorTypeTemp: + chart = sensorTemperatureChartTmpl.Copy() + case sensorTypeVoltage: + chart = sensorVoltageChartTmpl.Copy() + case sensorTypePower: + chart = sensorPowerChartTmpl.Copy() + case sensorTypeHumidity: + chart = sensorHumidityChartTmpl.Copy() + case sensorTypeFan: + chart = sensorFanChartTmpl.Copy() + case sensorTypeCurrent: + chart = sensorCurrentChartTmpl.Copy() + case sensorTypeEnergy: + chart = sensorEnergyChartTmpl.Copy() + default: + return + } + + chip, feat, subfeat := snakeCase(sn.chip), snakeCase(sn.feature), snakeCase(sn.subfeature) + + chart.ID = fmt.Sprintf(chart.ID, chip, feat, subfeat) + chart.Labels = []module.Label{ + {Key: "chip", Value: sn.chip}, + {Key: "feature", Value: sn.feature}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, chip, feat, subfeat) + } + + if err := s.Charts().Add(chart); err != nil { + s.Warning(err) + } +} + +func (s *Sensors) removeSensorChart(px string) { + for _, chart := range *s.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + return + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/collect.go b/src/go/collectors/go.d.plugin/modules/sensors/collect.go new file mode 100644 index 00000000000000..46e900ad0a31a6 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/collect.go @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "strconv" + "strings" +) + +type sensorStats struct { + chip string + feature string + subfeature string + value string +} + +func (s *sensorStats) String() string { + return fmt.Sprintf("chip:%s feat:%s subfeat:%s value:%s", s.chip, s.feature, s.subfeature, s.value) +} + +const ( + sensorTypeTemp = "temperature" + sensorTypeVoltage = "voltage" + sensorTypePower = "power" + sensorTypeHumidity = "humidity" + sensorTypeFan = "fan" + sensorTypeCurrent = "current" + sensorTypeEnergy = "energy" +) + +const precision = 1000 + +func (s *Sensors) collect() (map[string]int64, error) { + bs, err := s.exec.sensorsInfo() + if err != nil { + return nil, err + } + + if len(bs) == 0 { + return nil, errors.New("empty response from sensors") + } + + sensors, err := parseSensors(bs) + if err != nil { + return nil, err + } + if len(sensors) == 0 { + return nil, errors.New("no sensors found") + } + + mx := make(map[string]int64) + seen := make(map[string]bool) + + for _, sn := range sensors { + // TODO: Most likely we need different values depending on the type of sensor. + if !strings.HasSuffix(sn.subfeature, "_input") { + s.Debugf("skipping non input sensor: '%s'", sn) + continue + } + + v, err := strconv.ParseFloat(sn.value, 64) + if err != nil { + s.Debugf("parsing value for sensor '%s': %v", sn, err) + continue + } + + if sensorType(sn) == "" { + s.Debugf("can not find type for sensor '%s'", sn) + continue + } + + if minVal, maxVal, ok := sensorLimits(sn); ok && (v < minVal || v > maxVal) { + s.Debugf("value outside limits [%d/%d] for sensor '%s'", int64(minVal), int64(maxVal), sn) + continue + } + + key := fmt.Sprintf("sensor_chip_%s_feature_%s_subfeature_%s", sn.chip, sn.feature, sn.subfeature) + key = snakeCase(key) + if !s.sensors[key] { + s.sensors[key] = true + s.addSensorChart(sn) + } + + seen[key] = true + + mx[key] = int64(v * precision) + } + + for k := range s.sensors { + if !seen[k] { + delete(s.sensors, k) + s.removeSensorChart(k) + } + } + + return mx, nil +} + +func snakeCase(n string) string { + return strings.ToLower(strings.ReplaceAll(n, " ", "_")) +} + +func sensorLimits(sn sensorStats) (minVal float64, maxVal float64, ok bool) { + switch sensorType(sn) { + case sensorTypeTemp: + return -127, 1000, true + case sensorTypeVoltage: + return -400, 400, true + case sensorTypeCurrent: + return -127, 127, true + case sensorTypeFan: + return 0, 65535, true + default: + return 0, 0, false + } +} + +func sensorType(sn sensorStats) string { + switch { + case strings.HasPrefix(sn.subfeature, "temp"): + return sensorTypeTemp + case strings.HasPrefix(sn.subfeature, "in"): + return sensorTypeVoltage + case strings.HasPrefix(sn.subfeature, "power"): + return sensorTypePower + case strings.HasPrefix(sn.subfeature, "humidity"): + return sensorTypeHumidity + case strings.HasPrefix(sn.subfeature, "fan"): + return sensorTypeFan + case strings.HasPrefix(sn.subfeature, "curr"): + return sensorTypeCurrent + case strings.HasPrefix(sn.subfeature, "energy"): + return sensorTypeEnergy + default: + return "" + } +} + +func parseSensors(output []byte) ([]sensorStats, error) { + var sensors []sensorStats + + sc := bufio.NewScanner(bytes.NewReader(output)) + + var chip, feat string + + for sc.Scan() { + text := sc.Text() + if text == "" { + chip, feat = "", "" + continue + } + + switch { + case strings.HasPrefix(text, " ") && chip != "" && feat != "": + parts := strings.Split(text, ":") + if len(parts) != 2 { + continue + } + subfeat, value := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + sensors = append(sensors, sensorStats{ + chip: chip, + feature: feat, + subfeature: subfeat, + value: value, + }) + case strings.HasSuffix(text, ":") && chip != "": + feat = strings.TrimSpace(strings.TrimSuffix(text, ":")) + default: + chip = text + feat = "" + } + } + + return sensors, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/config_schema.json b/src/go/collectors/go.d.plugin/modules/sensors/config_schema.json new file mode 100644 index 00000000000000..6c12ca9b8d6421 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/config_schema.json @@ -0,0 +1,47 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Sensors collector configuration", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 10 + }, + "binary_path": { + "title": "Binary path", + "description": "Path to the `sensors` binary.", + "type": "string", + "default": "/usr/bin/sensors" + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for executing the binary, specified in seconds.", + "type": "number", + "minimum": 0.5, + "default": 2 + } + }, + "required": [ + "binary_path" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "binary_path": { + "ui:help": "If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable." + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/exec.go b/src/go/collectors/go.d.plugin/modules/sensors/exec.go new file mode 100644 index 00000000000000..b920da66ec7f95 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/exec.go @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "context" + "fmt" + "os/exec" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/logger" +) + +func newSensorsCliExec(binPath string, timeout time.Duration) *sensorsCliExec { + return &sensorsCliExec{ + binPath: binPath, + timeout: timeout, + } +} + +type sensorsCliExec struct { + *logger.Logger + + binPath string + timeout time.Duration +} + +func (e *sensorsCliExec) sensorsInfo() ([]byte, error) { + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, e.binPath, "-A", "-u") + e.Debugf("executing '%s'", cmd) + + bs, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("error on '%s': %v", cmd, err) + } + + return bs, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/init.go b/src/go/collectors/go.d.plugin/modules/sensors/init.go new file mode 100644 index 00000000000000..6753693da5ad43 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/init.go @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "errors" + "os" + "os/exec" + "strings" +) + +func (s *Sensors) validateConfig() error { + if s.BinaryPath == "" { + return errors.New("no sensors binary path specified") + } + return nil +} + +func (s *Sensors) initSensorsCliExec() (sensorsCLI, error) { + binPath := s.BinaryPath + + if !strings.HasPrefix(binPath, "/") { + path, err := exec.LookPath(binPath) + if err != nil { + return nil, err + } + binPath = path + } + + if _, err := os.Stat(binPath); err != nil { + return nil, err + } + + sensorsExec := newSensorsCliExec(binPath, s.Timeout.Duration()) + sensorsExec.Logger = s.Logger + + return sensorsExec, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml b/src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml new file mode 100644 index 00000000000000..5ea94f3982eacc --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml @@ -0,0 +1,157 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-sensors + plugin_name: go.d.plugin + module_name: sensors + monitored_instance: + name: Linux Sensors (lm-sensors) + link: https://hwmon.wiki.kernel.org/lm_sensors + icon_filename: "microchip.svg" + categories: + - data-collection.hardware-devices-and-sensors + keywords: + - sensors + - temperature + - voltage + - current + - power + - fan + - energy + - humidity + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + most_popular: false + overview: + data_collection: + metrics_description: > + This collector gathers real-time system sensor statistics, + including temperature, voltage, current, power, fan speed, energy consumption, and humidity, + utilizing the [sensors](https://linux.die.net/man/1/sensors) binary. + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + The following type of sensors are auto-detected: + + - temperature + - fan + - voltage + - current + - power + - energy + - humidity + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Install lm-sensors + description: | + - Install `lm-sensors` using your distribution's package manager. + - Run `sensors-detect` to detect hardware monitoring chips. + configuration: + file: + name: go.d/sensors.conf + options: + description: | + The following options can be defined globally: update_every. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 10 + required: false + - name: binary_path + description: Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. + default_value: /usr/bin/sensors + required: true + - name: timeout + description: Timeout for executing the binary, specified in seconds. + default_value: 2 + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Custom binary path + description: The executable is not in the directories specified in the PATH environment variable. + config: | + jobs: + - name: sensors + binary_path: /usr/local/sbin/sensors + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: sensor + description: These metrics refer to the sensor. + labels: + - name: chip + description: The hardware component responsible for the sensor monitoring. + - name: feature + description: The specific sensor or monitoring point provided by the chip. + metrics: + - name: sensors.sensor_temperature + description: Sensor temperature + unit: Celsius + chart_type: line + dimensions: + - name: temperature + - name: sensors.sensor_voltage + description: Sensor voltage + unit: Volts + chart_type: line + dimensions: + - name: voltage + - name: sensors.sensor_current + description: Sensor current + unit: Amperes + chart_type: line + dimensions: + - name: current + - name: sensors.sensor_power + description: Sensor power + unit: Watts + chart_type: line + dimensions: + - name: power + - name: sensors.sensor_fan_speed + description: Sensor fan speed + unit: RPM + chart_type: line + dimensions: + - name: fan + - name: sensors.sensor_energy + description: Sensor energy + unit: Joules + chart_type: line + dimensions: + - name: energy + - name: sensors.sensor_humidity + description: Sensor humidity + unit: percent + chart_type: area + dimensions: + - name: humidity diff --git a/src/go/collectors/go.d.plugin/modules/sensors/sensors.go b/src/go/collectors/go.d.plugin/modules/sensors/sensors.go new file mode 100644 index 00000000000000..f909811d7cb978 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/sensors.go @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("sensors", module.Creator{ + JobConfigSchema: configSchema, + Defaults: module.Defaults{ + UpdateEvery: 10, + }, + Create: func() module.Module { return New() }, + }) +} + +func New() *Sensors { + return &Sensors{ + Config: Config{ + BinaryPath: "/usr/bin/sensors", + Timeout: web.Duration(time.Second * 2), + }, + charts: &module.Charts{}, + sensors: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` + BinaryPath string `yaml:"binary_path" json:"binary_path"` +} + +type ( + Sensors struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + exec sensorsCLI + + sensors map[string]bool + } + sensorsCLI interface { + sensorsInfo() ([]byte, error) + } +) + +func (s *Sensors) Configuration() any { + return s.Config +} + +func (s *Sensors) Init() error { + if err := s.validateConfig(); err != nil { + s.Errorf("config validation: %s", err) + return err + } + + sensorsExec, err := s.initSensorsCliExec() + if err != nil { + s.Errorf("sensors exec initialization: %v", err) + return err + } + s.exec = sensorsExec + + return nil +} + +func (s *Sensors) Check() error { + mx, err := s.collect() + if err != nil { + s.Error(err) + return err + } + + if len(mx) == 0 { + return errors.New("no metrics collected") + } + + return nil +} + +func (s *Sensors) Charts() *module.Charts { + return s.charts +} + +func (s *Sensors) Collect() map[string]int64 { + mx, err := s.collect() + if err != nil { + s.Error(err) + } + + if len(mx) == 0 { + return nil + } + + return mx +} + +func (s *Sensors) Cleanup() {} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/sensors_test.go b/src/go/collectors/go.d.plugin/modules/sensors/sensors_test.go new file mode 100644 index 00000000000000..d9b4242e7bb402 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/sensors_test.go @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package sensors + +import ( + "errors" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") + + dataSensorsTemp, _ = os.ReadFile("testdata/sensors-temp.txt") + dataSensorsTempInCurrPowerFan, _ = os.ReadFile("testdata/sensors-temp-in-curr-power-fan.txt") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + + "dataSensorsTemp": dataSensorsTemp, + "dataSensorsTempInCurrPowerFan": dataSensorsTempInCurrPowerFan, + } { + require.NotNil(t, data, name) + + } +} + +func TestSensors_Configuration(t *testing.T) { + module.TestConfigurationSerialize(t, &Sensors{}, dataConfigJSON, dataConfigYAML) +} + +func TestSensors_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "fails if 'binary_path' is not set": { + wantFail: true, + config: Config{ + BinaryPath: "", + }, + }, + "fails if failed to find binary": { + wantFail: true, + config: Config{ + BinaryPath: "sensors!!!", + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + sensors := New() + sensors.Config = test.config + + if test.wantFail { + assert.Error(t, sensors.Init()) + } else { + assert.NoError(t, sensors.Init()) + } + }) + } +} + +func TestSensors_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func() *Sensors + }{ + "not initialized exec": { + prepare: func() *Sensors { + return New() + }, + }, + "after check": { + prepare: func() *Sensors { + sensors := New() + sensors.exec = prepareMockOkOnlyTemp() + _ = sensors.Check() + return sensors + }, + }, + "after collect": { + prepare: func() *Sensors { + sensors := New() + sensors.exec = prepareMockOkTempInCurrPowerFan() + _ = sensors.Collect() + return sensors + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + sensors := test.prepare() + + assert.NotPanics(t, sensors.Cleanup) + }) + } +} + +func TestSensors_Charts(t *testing.T) { + assert.NotNil(t, New().Charts()) +} + +func TestSensors_Check(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockSensorsCLIExec + wantFail bool + }{ + "only temperature": { + wantFail: false, + prepareMock: prepareMockOkOnlyTemp, + }, + "temperature and voltage": { + wantFail: false, + prepareMock: prepareMockOkTempInCurrPowerFan, + }, + "error on sensors info call": { + wantFail: true, + prepareMock: prepareMockErr, + }, + "empty response": { + wantFail: true, + prepareMock: prepareMockEmptyResponse, + }, + "unexpected response": { + wantFail: true, + prepareMock: prepareMockUnexpectedResponse, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + sensors := New() + mock := test.prepareMock() + sensors.exec = mock + + if test.wantFail { + assert.Error(t, sensors.Check()) + } else { + assert.NoError(t, sensors.Check()) + } + }) + } +} + +func TestSensors_Collect(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockSensorsCLIExec + wantMetrics map[string]int64 + wantCharts int + }{ + "only temperature": { + prepareMock: prepareMockOkOnlyTemp, + wantCharts: 24, + wantMetrics: map[string]int64{ + "sensor_chip_bnxt_en-pci-6200_feature_temp1_subfeature_temp1_input": 80000, + "sensor_chip_bnxt_en-pci-6201_feature_temp1_subfeature_temp1_input": 81000, + "sensor_chip_k10temp-pci-00c3_feature_tccd1_subfeature_temp3_input": 58250, + "sensor_chip_k10temp-pci-00c3_feature_tccd2_subfeature_temp4_input": 60250, + "sensor_chip_k10temp-pci-00c3_feature_tccd3_subfeature_temp5_input": 57000, + "sensor_chip_k10temp-pci-00c3_feature_tccd4_subfeature_temp6_input": 57250, + "sensor_chip_k10temp-pci-00c3_feature_tccd5_subfeature_temp7_input": 57750, + "sensor_chip_k10temp-pci-00c3_feature_tccd6_subfeature_temp8_input": 59500, + "sensor_chip_k10temp-pci-00c3_feature_tccd7_subfeature_temp9_input": 58500, + "sensor_chip_k10temp-pci-00c3_feature_tccd8_subfeature_temp10_input": 61250, + "sensor_chip_k10temp-pci-00c3_feature_tctl_subfeature_temp1_input": 62000, + "sensor_chip_k10temp-pci-00cb_feature_tccd1_subfeature_temp3_input": 54000, + "sensor_chip_k10temp-pci-00cb_feature_tccd2_subfeature_temp4_input": 55500, + "sensor_chip_k10temp-pci-00cb_feature_tccd3_subfeature_temp5_input": 56000, + "sensor_chip_k10temp-pci-00cb_feature_tccd4_subfeature_temp6_input": 52750, + "sensor_chip_k10temp-pci-00cb_feature_tccd5_subfeature_temp7_input": 53500, + "sensor_chip_k10temp-pci-00cb_feature_tccd6_subfeature_temp8_input": 55250, + "sensor_chip_k10temp-pci-00cb_feature_tccd7_subfeature_temp9_input": 53000, + "sensor_chip_k10temp-pci-00cb_feature_tccd8_subfeature_temp10_input": 53750, + "sensor_chip_k10temp-pci-00cb_feature_tctl_subfeature_temp1_input": 57500, + "sensor_chip_nouveau-pci-4100_feature_temp1_subfeature_temp1_input": 51000, + "sensor_chip_nvme-pci-0100_feature_composite_subfeature_temp1_input": 39850, + "sensor_chip_nvme-pci-6100_feature_composite_subfeature_temp1_input": 48850, + "sensor_chip_nvme-pci-8100_feature_composite_subfeature_temp1_input": 39850, + }, + }, + "multiple sensors": { + prepareMock: prepareMockOkTempInCurrPowerFan, + wantCharts: 19, + wantMetrics: map[string]int64{ + "sensor_chip_acpitz-acpi-0_feature_temp1_subfeature_temp1_input": 88000, + "sensor_chip_amdgpu-pci-0300_feature_edge_subfeature_temp1_input": 53000, + "sensor_chip_amdgpu-pci-0300_feature_fan1_subfeature_fan1_input": 0, + "sensor_chip_amdgpu-pci-0300_feature_junction_subfeature_temp2_input": 58000, + "sensor_chip_amdgpu-pci-0300_feature_mem_subfeature_temp3_input": 57000, + "sensor_chip_amdgpu-pci-0300_feature_vddgfx_subfeature_in0_input": 787, + "sensor_chip_amdgpu-pci-6700_feature_edge_subfeature_temp1_input": 60000, + "sensor_chip_amdgpu-pci-6700_feature_ppt_subfeature_power1_input": 8144, + "sensor_chip_amdgpu-pci-6700_feature_vddgfx_subfeature_in0_input": 1335, + "sensor_chip_amdgpu-pci-6700_feature_vddnb_subfeature_in1_input": 973, + "sensor_chip_asus-isa-0000_feature_cpu_fan_subfeature_fan1_input": 5700000, + "sensor_chip_asus-isa-0000_feature_gpu_fan_subfeature_fan2_input": 6600000, + "sensor_chip_bat0-acpi-0_feature_in0_subfeature_in0_input": 17365, + "sensor_chip_k10temp-pci-00c3_feature_tctl_subfeature_temp1_input": 90000, + "sensor_chip_nvme-pci-0600_feature_composite_subfeature_temp1_input": 33850, + "sensor_chip_nvme-pci-0600_feature_sensor_1_subfeature_temp2_input": 48850, + "sensor_chip_nvme-pci-0600_feature_sensor_2_subfeature_temp3_input": 33850, + "sensor_chip_ucsi_source_psy_usbc000:001-isa-0000_feature_curr1_subfeature_curr1_input": 0, + "sensor_chip_ucsi_source_psy_usbc000:001-isa-0000_feature_in0_subfeature_in0_input": 0, + }, + }, + "error on sensors info call": { + prepareMock: prepareMockErr, + wantMetrics: nil, + }, + "empty response": { + prepareMock: prepareMockEmptyResponse, + wantMetrics: nil, + }, + "unexpected response": { + prepareMock: prepareMockUnexpectedResponse, + wantMetrics: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + sensors := New() + mock := test.prepareMock() + sensors.exec = mock + + var mx map[string]int64 + for i := 0; i < 10; i++ { + mx = sensors.Collect() + } + + assert.Equal(t, test.wantMetrics, mx) + assert.Len(t, *sensors.Charts(), test.wantCharts) + testMetricsHasAllChartsDims(t, sensors, mx) + }) + } +} + +func testMetricsHasAllChartsDims(t *testing.T, sensors *Sensors, mx map[string]int64) { + for _, chart := range *sensors.Charts() { + if chart.Obsolete { + continue + } + for _, dim := range chart.Dims { + _, ok := mx[dim.ID] + assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) + } + for _, v := range chart.Vars { + _, ok := mx[v.ID] + assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) + } + } +} + +func prepareMockOkOnlyTemp() *mockSensorsCLIExec { + return &mockSensorsCLIExec{ + sensorsInfoData: dataSensorsTemp, + } +} + +func prepareMockOkTempInCurrPowerFan() *mockSensorsCLIExec { + return &mockSensorsCLIExec{ + sensorsInfoData: dataSensorsTempInCurrPowerFan, + } +} + +func prepareMockErr() *mockSensorsCLIExec { + return &mockSensorsCLIExec{ + errOnSensorsInfo: true, + } +} + +func prepareMockUnexpectedResponse() *mockSensorsCLIExec { + return &mockSensorsCLIExec{ + sensorsInfoData: []byte(` +Lorem ipsum dolor sit amet, consectetur adipiscing elit. +Nulla malesuada erat id magna mattis, eu viverra tellus rhoncus. +Fusce et felis pulvinar, posuere sem non, porttitor eros. +`), + } +} + +func prepareMockEmptyResponse() *mockSensorsCLIExec { + return &mockSensorsCLIExec{} +} + +type mockSensorsCLIExec struct { + errOnSensorsInfo bool + sensorsInfoData []byte +} + +func (m *mockSensorsCLIExec) sensorsInfo() ([]byte, error) { + if m.errOnSensorsInfo { + return nil, errors.New("mock.sensorsInfo() error") + } + + return m.sensorsInfoData, nil +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.json b/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.json new file mode 100644 index 00000000000000..09571319348b46 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.json @@ -0,0 +1,5 @@ +{ + "update_every": 123, + "timeout": 123.123, + "binary_path": "ok" +} diff --git a/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.yaml new file mode 100644 index 00000000000000..baf3bcd0b0fab0 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/testdata/config.yaml @@ -0,0 +1,3 @@ +update_every: 123 +timeout: 123.123 +binary_path: "ok" diff --git a/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp-in-curr-power-fan.txt b/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp-in-curr-power-fan.txt new file mode 100644 index 00000000000000..a38c7ab4ecf4aa --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp-in-curr-power-fan.txt @@ -0,0 +1,72 @@ +asus-isa-0000 +cpu_fan: + fan1_input: 5700.000 +gpu_fan: + fan2_input: 6600.000 +nvme-pci-0600 +Composite: + temp1_input: 33.850 + temp1_max: 83.850 + temp1_min: -40.150 + temp1_crit: 87.850 + temp1_alarm: 0.000 +Sensor 1: + temp2_input: 48.850 + temp2_max: 65261.850 + temp2_min: -273.150 +Sensor 2: + temp3_input: 33.850 + temp3_max: 65261.850 + temp3_min: -273.150 +amdgpu-pci-6700 +vddgfx: + in0_input: 1.335 +vddnb: + in1_input: 0.973 +edge: + temp1_input: 60.000 +PPT: + power1_average: 5.088 + power1_input: 8.144 +BAT0-acpi-0 +in0: + in0_input: 17.365 +ucsi_source_psy_USBC000:001-isa-0000 +in0: + in0_input: 0.000 + in0_min: 0.000 + in0_max: 0.000 +curr1: + curr1_input: 0.000 + curr1_max: 0.000 +k10temp-pci-00c3 +Tctl: + temp1_input: 90.000 +amdgpu-pci-0300 +vddgfx: + in0_input: 0.787 +fan1: + fan1_input: 0.000 + fan1_min: 0.000 + fan1_max: 4900.000 +edge: + temp1_input: 53.000 + temp1_crit: 100.000 + temp1_crit_hyst: -273.150 + temp1_emergency: 105.000 +junction: + temp2_input: 58.000 + temp2_crit: 100.000 + temp2_crit_hyst: -273.150 + temp2_emergency: 105.000 +mem: + temp3_input: 57.000 + temp3_crit: 105.000 + temp3_crit_hyst: -273.150 + temp3_emergency: 110.000 +PPT: + power1_average: 29.000 + power1_cap: 120.000 +acpitz-acpi-0 +temp1: + temp1_input: 88.000 diff --git a/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp.txt b/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp.txt new file mode 100644 index 00000000000000..decc7ee3992658 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/testdata/sensors-temp.txt @@ -0,0 +1,81 @@ +k10temp-pci-00cb +Tctl: + temp1_input: 57.500 +Tccd1: + temp3_input: 54.000 +Tccd2: + temp4_input: 55.500 +Tccd3: + temp5_input: 56.000 +Tccd4: + temp6_input: 52.750 +Tccd5: + temp7_input: 53.500 +Tccd6: + temp8_input: 55.250 +Tccd7: + temp9_input: 53.000 +Tccd8: + temp10_input: 53.750 + +bnxt_en-pci-6201 +temp1: + temp1_input: 81.000 + +nvme-pci-6100 +Composite: + temp1_input: 48.850 + temp1_max: 89.850 + temp1_min: -20.150 + temp1_crit: 94.850 + temp1_alarm: 0.000 + +nvme-pci-0100 +Composite: + temp1_input: 39.850 + temp1_max: 89.850 + temp1_min: -20.150 + temp1_crit: 94.850 + temp1_alarm: 0.000 + +nouveau-pci-4100 +temp1: + temp1_input: 51.000 + temp1_max: 95.000 + temp1_max_hyst: 3.000 + temp1_crit: 105.000 + temp1_crit_hyst: 5.000 + temp1_emergency: 135.000 + temp1_emergency_hyst: 5.000 + +k10temp-pci-00c3 +Tctl: + temp1_input: 62.000 +Tccd1: + temp3_input: 58.250 +Tccd2: + temp4_input: 60.250 +Tccd3: + temp5_input: 57.000 +Tccd4: + temp6_input: 57.250 +Tccd5: + temp7_input: 57.750 +Tccd6: + temp8_input: 59.500 +Tccd7: + temp9_input: 58.500 +Tccd8: + temp10_input: 61.250 + +bnxt_en-pci-6200 +temp1: + temp1_input: 80.000 + +nvme-pci-8100 +Composite: + temp1_input: 39.850 + temp1_max: 89.850 + temp1_min: -20.150 + temp1_crit: 94.850 + temp1_alarm: 0.000 diff --git a/src/go/collectors/go.d.plugin/modules/zfspool/zfspool_test.go b/src/go/collectors/go.d.plugin/modules/zfspool/zfspool_test.go index 7a9392a0346db3..ea40aa06de0197 100644 --- a/src/go/collectors/go.d.plugin/modules/zfspool/zfspool_test.go +++ b/src/go/collectors/go.d.plugin/modules/zfspool/zfspool_test.go @@ -67,7 +67,6 @@ func TestZFSPool_Init(t *testing.T) { } }) } - } func TestZFSPool_Cleanup(t *testing.T) { @@ -205,7 +204,6 @@ func TestZFSPool_Collect(t *testing.T) { } }) } - } func prepareMockOK() *mockZpoolCLIExec { From 19c7bcb5a458c71040ad5fe4f34180ba3da631de Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:57:16 +0300 Subject: [PATCH 15/16] Regenerate integrations.js (#17472) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 43 +++++ integrations/integrations.json | 43 +++++ src/collectors/COLLECTORS.md | 2 + .../go.d.plugin/modules/sensors/README.md | 1 + .../integrations/linux_sensors_lm-sensors.md | 180 ++++++++++++++++++ 5 files changed, 269 insertions(+) create mode 120000 src/go/collectors/go.d.plugin/modules/sensors/README.md create mode 100644 src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md diff --git a/integrations/integrations.js b/integrations/integrations.js index ae51bc4e40c5d6..fe15c0c3f46c8d 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -15752,6 +15752,49 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/scaleio/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-sensors", + "plugin_name": "go.d.plugin", + "module_name": "sensors", + "monitored_instance": { + "name": "Linux Sensors (lm-sensors)", + "link": "https://hwmon.wiki.kernel.org/lm_sensors", + "icon_filename": "microchip.svg", + "categories": [ + "data-collection.hardware-devices-and-sensors" + ] + }, + "keywords": [ + "sensors", + "temperature", + "voltage", + "current", + "power", + "fan", + "energy", + "humidity" + ], + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "most_popular": false + }, + "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary.\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Install lm-sensors\n\n- Install `lm-sensors` using your distribution's package manager.\n- Run `sensors-detect` to detect hardware monitoring chips.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n{% /details %}\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n{% details summary=\"Config\" %}\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n{% /details %}\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `sensors` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m sensors\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor\n\nThese metrics refer to the sensor.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| chip | The hardware component responsible for the sensor monitoring. |\n| feature | The specific sensor or monitoring point provided by the chip. |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.sensor_temperature | temperature | Celsius |\n| sensors.sensor_voltage | voltage | Volts |\n| sensors.sensor_current | current | Amperes |\n| sensors.sensor_power | power | Watts |\n| sensors.sensor_fan_speed | fan | RPM |\n| sensors.sensor_energy | energy | Joules |\n| sensors.sensor_humidity | humidity | percent |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-sensors-Linux_Sensors_(lm-sensors)", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-snmp", diff --git a/integrations/integrations.json b/integrations/integrations.json index 10553d658d5bca..93382ce4e32be0 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -15750,6 +15750,49 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/scaleio/metadata.yaml", "related_resources": "" }, + { + "meta": { + "id": "collector-go.d.plugin-sensors", + "plugin_name": "go.d.plugin", + "module_name": "sensors", + "monitored_instance": { + "name": "Linux Sensors (lm-sensors)", + "link": "https://hwmon.wiki.kernel.org/lm_sensors", + "icon_filename": "microchip.svg", + "categories": [ + "data-collection.hardware-devices-and-sensors" + ] + }, + "keywords": [ + "sensors", + "temperature", + "voltage", + "current", + "power", + "fan", + "energy", + "humidity" + ], + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "most_popular": false + }, + "overview": "# Linux Sensors (lm-sensors)\n\nPlugin: go.d.plugin\nModule: sensors\n\n## Overview\n\nThis collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary.\n\n\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe following type of sensors are auto-detected:\n\n- temperature\n- fan\n- voltage\n- current\n- power\n- energy\n- humidity\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Install lm-sensors\n\n- Install `lm-sensors` using your distribution's package manager.\n- Run `sensors-detect` to detect hardware monitoring chips.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/sensors.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/sensors.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 10 | no |\n| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes |\n| timeout | Timeout for executing the binary, specified in seconds. | 2 | no |\n\n#### Examples\n\n##### Custom binary path\n\nThe executable is not in the directories specified in the PATH environment variable.\n\n```yaml\njobs:\n - name: sensors\n binary_path: /usr/local/sbin/sensors\n\n```\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\nTo troubleshoot issues with the `sensors` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m sensors\n ```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per sensor\n\nThese metrics refer to the sensor.\n\nLabels:\n\n| Label | Description |\n|:-----------|:----------------|\n| chip | The hardware component responsible for the sensor monitoring. |\n| feature | The specific sensor or monitoring point provided by the chip. |\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| sensors.sensor_temperature | temperature | Celsius |\n| sensors.sensor_voltage | voltage | Volts |\n| sensors.sensor_current | current | Amperes |\n| sensors.sensor_power | power | Watts |\n| sensors.sensor_fan_speed | fan | RPM |\n| sensors.sensor_energy | energy | Joules |\n| sensors.sensor_humidity | humidity | percent |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-sensors-Linux_Sensors_(lm-sensors)", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/sensors/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-snmp", diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index a1a64b14766b18..ba5fbe66ff245a 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -507,6 +507,8 @@ If you don't see the app/service you'd like to monitor in this list: - [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md) +- [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md) + - [Linux Sensors (sysfs)](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md) - [NVML](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nvml.md) diff --git a/src/go/collectors/go.d.plugin/modules/sensors/README.md b/src/go/collectors/go.d.plugin/modules/sensors/README.md new file mode 120000 index 00000000000000..4e92b088274370 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/README.md @@ -0,0 +1 @@ +integrations/linux_sensors_lm-sensors.md \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md b/src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md new file mode 100644 index 00000000000000..5567e8433c89bf --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md @@ -0,0 +1,180 @@ + + +# Linux Sensors (lm-sensors) + + + + + +Plugin: go.d.plugin +Module: sensors + + + +## Overview + +This collector gathers real-time system sensor statistics, including temperature, voltage, current, power, fan speed, energy consumption, and humidity, utilizing the [sensors](https://linux.die.net/man/1/sensors) binary. + + + + +This collector is supported on all platforms. + +This collector only supports collecting metrics from a single instance of this integration. + + +### Default Behavior + +#### Auto-Detection + +The following type of sensors are auto-detected: + +- temperature +- fan +- voltage +- current +- power +- energy +- humidity + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per sensor + +These metrics refer to the sensor. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| chip | The hardware component responsible for the sensor monitoring. | +| feature | The specific sensor or monitoring point provided by the chip. | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| sensors.sensor_temperature | temperature | Celsius | +| sensors.sensor_voltage | voltage | Volts | +| sensors.sensor_current | current | Amperes | +| sensors.sensor_power | power | Watts | +| sensors.sensor_fan_speed | fan | RPM | +| sensors.sensor_energy | energy | Joules | +| sensors.sensor_humidity | humidity | percent | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +#### Install lm-sensors + +- Install `lm-sensors` using your distribution's package manager. +- Run `sensors-detect` to detect hardware monitoring chips. + + + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/sensors.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/sensors.conf +``` +#### Options + +The following options can be defined globally: update_every. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 10 | no | +| binary_path | Path to the `sensors` binary. If an absolute path is provided, the collector will use it directly; otherwise, it will search for the binary in directories specified in the PATH environment variable. | /usr/bin/sensors | yes | +| timeout | Timeout for executing the binary, specified in seconds. | 2 | no | + +
+ +#### Examples + +##### Custom binary path + +The executable is not in the directories specified in the PATH environment variable. + +
Config + +```yaml +jobs: + - name: sensors + binary_path: /usr/local/sbin/sensors + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `sensors` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m sensors + ``` + + From 3b1c4fa43834201643aa291cba0330b348c8ede3 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Mon, 22 Apr 2024 12:51:20 +0300 Subject: [PATCH 16/16] remove python.d/sensors (#17473) --- CMakeLists.txt | 2 - src/collectors/python.d.plugin/python.d.conf | 2 +- .../python.d.plugin/sensors/README.md | 1 - .../integrations/linux_sensors_lm-sensors.md | 187 ------------------ .../python.d.plugin/sensors/metadata.yaml | 184 ----------------- .../python.d.plugin/sensors/sensors.chart.py | 179 ----------------- .../python.d.plugin/sensors/sensors.conf | 61 ------ 7 files changed, 1 insertion(+), 615 deletions(-) delete mode 120000 src/collectors/python.d.plugin/sensors/README.md delete mode 100644 src/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md delete mode 100644 src/collectors/python.d.plugin/sensors/metadata.yaml delete mode 100644 src/collectors/python.d.plugin/sensors/sensors.chart.py delete mode 100644 src/collectors/python.d.plugin/sensors/sensors.conf diff --git a/CMakeLists.txt b/CMakeLists.txt index 58cbf818d8cbbf..5d09f0a1ac5ca1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2572,7 +2572,6 @@ install(FILES src/collectors/python.d.plugin/retroshare/retroshare.conf src/collectors/python.d.plugin/riakkv/riakkv.conf src/collectors/python.d.plugin/samba/samba.conf - src/collectors/python.d.plugin/sensors/sensors.conf src/collectors/python.d.plugin/smartd_log/smartd_log.conf src/collectors/python.d.plugin/spigotmc/spigotmc.conf src/collectors/python.d.plugin/squid/squid.conf @@ -2620,7 +2619,6 @@ install(FILES src/collectors/python.d.plugin/retroshare/retroshare.chart.py src/collectors/python.d.plugin/riakkv/riakkv.chart.py src/collectors/python.d.plugin/samba/samba.chart.py - src/collectors/python.d.plugin/sensors/sensors.chart.py src/collectors/python.d.plugin/smartd_log/smartd_log.chart.py src/collectors/python.d.plugin/spigotmc/spigotmc.chart.py src/collectors/python.d.plugin/squid/squid.chart.py diff --git a/src/collectors/python.d.plugin/python.d.conf b/src/collectors/python.d.plugin/python.d.conf index c73e2f587ff1e4..e2cfa3342c93f0 100644 --- a/src/collectors/python.d.plugin/python.d.conf +++ b/src/collectors/python.d.plugin/python.d.conf @@ -63,7 +63,7 @@ hpssa: no # retroshare: yes # riakkv: yes # samba: yes -# sensors: yes +sensors: no # replaced with go.d/sensors. Disabled for existing installations. # smartd_log: yes # spigotmc: yes # squid: yes diff --git a/src/collectors/python.d.plugin/sensors/README.md b/src/collectors/python.d.plugin/sensors/README.md deleted file mode 120000 index 4e92b088274370..00000000000000 --- a/src/collectors/python.d.plugin/sensors/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/linux_sensors_lm-sensors.md \ No newline at end of file diff --git a/src/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md b/src/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md deleted file mode 100644 index f743d410c96140..00000000000000 --- a/src/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md +++ /dev/null @@ -1,187 +0,0 @@ - - -# Linux Sensors (lm-sensors) - - - - - -Plugin: python.d.plugin -Module: sensors - - - -## Overview - -Examine Linux Sensors metrics with Netdata for insights into hardware health and performance. - -Enhance your system's reliability with real-time hardware health insights. - - -Reads system sensors information (temperature, voltage, electric current, power, etc.) via [lm-sensors](https://hwmon.wiki.kernel.org/lm_sensors). - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -The following type of sensors are auto-detected: -- temperature - fan - voltage - current - power - energy - humidity - - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per chip - -Metrics related to chips. Each chip provides a set of the following metrics, each having the chip name in the metric name as reported by `sensors -u`. - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| sensors.temperature | a dimension per sensor | Celsius | -| sensors.voltage | a dimension per sensor | Volts | -| sensors.current | a dimension per sensor | Ampere | -| sensors.power | a dimension per sensor | Watt | -| sensors.fan | a dimension per sensor | Rotations/min | -| sensors.energy | a dimension per sensor | Joule | -| sensors.humidity | a dimension per sensor | Percent | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/sensors.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/sensors.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -
Config options - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| types | The types of sensors to collect. | temperature, fan, voltage, current, power, energy, humidity | yes | -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | - -
- -#### Examples - -##### Default - -Default configuration. - -```yaml -types: - - temperature - - fan - - voltage - - current - - power - - energy - - humidity - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `sensors` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin sensors debug trace - ``` - -### lm-sensors doesn't work on your device - - - -### ACPI ring buffer errors are printed - - - - diff --git a/src/collectors/python.d.plugin/sensors/metadata.yaml b/src/collectors/python.d.plugin/sensors/metadata.yaml deleted file mode 100644 index ceb4ba7210ab09..00000000000000 --- a/src/collectors/python.d.plugin/sensors/metadata.yaml +++ /dev/null @@ -1,184 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: sensors - monitored_instance: - name: Linux Sensors (lm-sensors) - link: https://hwmon.wiki.kernel.org/lm_sensors - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: "microchip.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - sensors - - temperature - - voltage - - current - - power - - fan - - energy - - humidity - most_popular: false - overview: - data_collection: - metrics_description: | - Examine Linux Sensors metrics with Netdata for insights into hardware health and performance. - - Enhance your system's reliability with real-time hardware health insights. - method_description: > - Reads system sensors information (temperature, voltage, electric current, power, etc.) via [lm-sensors](https://hwmon.wiki.kernel.org/lm_sensors). - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: > - The following type of sensors are auto-detected: - - - temperature - - fan - - voltage - - current - - power - - energy - - humidity - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: python.d/sensors.conf - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: Config options - enabled: true - list: - - name: types - description: The types of sensors to collect. - default_value: "temperature, fan, voltage, current, power, energy, humidity" - required: true - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - examples: - folding: - enabled: true - title: Config - list: - - name: Default - folding: - enabled: false - description: Default configuration. - config: | - types: - - temperature - - fan - - voltage - - current - - power - - energy - - humidity - troubleshooting: - problems: - list: - - name: lm-sensors doesn't work on your device - description: | - When `lm-sensors` doesn't work on your device (e.g. for RPi temperatures), - use [the legacy bash collector](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/README.md) - - name: ACPI ring buffer errors are printed - description: | - There have been reports from users that on certain servers, ACPI ring buffer errors are printed by the kernel (`dmesg`) - when ACPI sensors are being accessed. We are tracking such cases in issue [#827](https://github.com/netdata/netdata/issues/827). - Please join this discussion for help. - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: chip - description: > - Metrics related to chips. Each chip provides a set of the following metrics, each having the chip name in the metric name as reported by `sensors -u`. - labels: [] - metrics: - - name: sensors.temperature - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.voltage - description: Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.current - description: Current - unit: "Ampere" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.power - description: Power - unit: "Watt" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.fan - description: Fans speed - unit: "Rotations/min" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.energy - description: Energy - unit: "Joule" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.humidity - description: Humidity - unit: "Percent" - chart_type: line - dimensions: - - name: a dimension per sensor diff --git a/src/collectors/python.d.plugin/sensors/sensors.chart.py b/src/collectors/python.d.plugin/sensors/sensors.chart.py deleted file mode 100644 index 0d9de3750fad9c..00000000000000 --- a/src/collectors/python.d.plugin/sensors/sensors.chart.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: sensors netdata python.d plugin -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from collections import defaultdict - -from bases.FrameworkServices.SimpleService import SimpleService -from third_party import lm_sensors as sensors - -ORDER = [ - 'temperature', - 'fan', - 'voltage', - 'current', - 'power', - 'energy', - 'humidity', -] - -# This is a prototype of chart definition which is used to dynamically create self.definitions -CHARTS = { - 'temperature': { - 'options': [None, 'Temperature', 'Celsius', 'temperature', 'sensors.temperature', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - }, - 'voltage': { - 'options': [None, 'Voltage', 'Volts', 'voltage', 'sensors.voltage', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - }, - 'current': { - 'options': [None, 'Current', 'Ampere', 'current', 'sensors.current', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - }, - 'power': { - 'options': [None, 'Power', 'Watt', 'power', 'sensors.power', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - }, - 'fan': { - 'options': [None, 'Fans speed', 'Rotations/min', 'fans', 'sensors.fan', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - }, - 'energy': { - 'options': [None, 'Energy', 'Joule', 'energy', 'sensors.energy', 'line'], - 'lines': [ - [None, None, 'incremental', 1, 1000] - ] - }, - 'humidity': { - 'options': [None, 'Humidity', 'Percent', 'humidity', 'sensors.humidity', 'line'], - 'lines': [ - [None, None, 'absolute', 1, 1000] - ] - } -} - -LIMITS = { - 'temperature': [-127, 1000], - 'voltage': [-400, 400], - 'current': [-127, 127], - 'fan': [0, 65535] -} - -TYPE_MAP = { - 0: 'voltage', - 1: 'fan', - 2: 'temperature', - 3: 'power', - 4: 'energy', - 5: 'current', - 6: 'humidity', - # 7: 'max_main', - # 16: 'vid', - # 17: 'intrusion', - # 18: 'max_other', - # 24: 'beep_enable' -} - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.order = list() - self.definitions = dict() - self.chips = configuration.get('chips') - self.priority = 60000 - - def get_data(self): - seen, data = dict(), dict() - try: - for chip in sensors.ChipIterator(): - chip_name = sensors.chip_snprintf_name(chip) - seen[chip_name] = defaultdict(list) - - for feat in sensors.FeatureIterator(chip): - if feat.type not in TYPE_MAP: - continue - - feat_type = TYPE_MAP[feat.type] - feat_name = str(feat.name.decode()) - feat_label = sensors.get_label(chip, feat) - feat_limits = LIMITS.get(feat_type) - sub_feat = next(sensors.SubFeatureIterator(chip, feat)) # current value - - if not sub_feat: - continue - - try: - v = sensors.get_value(chip, sub_feat.number) - except sensors.SensorsError: - continue - - if v is None: - continue - - seen[chip_name][feat_type].append((feat_name, feat_label)) - - if feat_limits and (v < feat_limits[0] or v > feat_limits[1]): - continue - - data[chip_name + '_' + feat_name] = int(v * 1000) - - except sensors.SensorsError as error: - self.error(error) - return None - - self.update_sensors_charts(seen) - - return data or None - - def update_sensors_charts(self, seen): - for chip_name, feat in seen.items(): - if self.chips and not any([chip_name.startswith(ex) for ex in self.chips]): - continue - - for feat_type, sub_feat in feat.items(): - if feat_type not in ORDER or feat_type not in CHARTS: - continue - - chart_id = '{}_{}'.format(chip_name, feat_type) - if chart_id in self.charts: - continue - - params = [chart_id] + list(CHARTS[feat_type]['options']) - new_chart = self.charts.add_chart(params) - new_chart.params['priority'] = self.get_chart_priority(feat_type) - - for name, label in sub_feat: - lines = list(CHARTS[feat_type]['lines'][0]) - lines[0] = chip_name + '_' + name - lines[1] = label - new_chart.add_dimension(lines) - - def check(self): - try: - sensors.init() - except sensors.SensorsError as error: - self.error(error) - return False - - self.priority = self.charts.priority - - return bool(self.get_data() and self.charts) - - def get_chart_priority(self, feat_type): - for i, v in enumerate(ORDER): - if v == feat_type: - return self.priority + i - return self.priority diff --git a/src/collectors/python.d.plugin/sensors/sensors.conf b/src/collectors/python.d.plugin/sensors/sensors.conf deleted file mode 100644 index d3369ba6614f78..00000000000000 --- a/src/collectors/python.d.plugin/sensors/sensors.conf +++ /dev/null @@ -1,61 +0,0 @@ -# netdata python.d.plugin configuration for sensors -# -# This file is in YaML format. Generally the format is: -# -# name: value -# - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# Limit the number of sensors types. -# Comment the ones you want to disable. -# Also, re-arranging this list controls the order of the charts at the -# netdata dashboard. - -types: - - temperature - - fan - - voltage - - current - - power - - energy - - humidity - -# ---------------------------------------------------------------------- -# Limit the number of sensors chips. -# Uncomment the first line (chips:) and add chip names below it. -# The chip names that start with like that will be matched. -# You can find the chip names using the sensors command. - -#chips: -# - i8k -# - coretemp -# -# chip names can be found using the sensors shell command -# the prefix is matched (anything that starts like that) -# -#---------------------------------------------------------------------- -