From dcd0f39a29ff4c69870796a2506797a54d44bdcb Mon Sep 17 00:00:00 2001 From: Christoph Wiechert Date: Fri, 11 Feb 2022 17:35:32 +0100 Subject: [PATCH 1/3] Add nvme support to smartmon.py Signed-off-by: Christoph Wiechert --- smartmon.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/smartmon.py b/smartmon.py index 1c39b49..b7f2d0a 100755 --- a/smartmon.py +++ b/smartmon.py @@ -203,6 +203,11 @@ def device_smart_capabilities(device): (bool): True whenever SMART is available, False otherwise. (bool): True whenever SMART is enabled, False otherwise. """ + + # NVME devices are SMART capable + if device.type == 'nvme': + return True, True + groups = device_info(device) state = { @@ -325,6 +330,44 @@ def collect_ata_error_count(device): yield Metric('device_errors', device.base_labels, error_count) +def collect_nvme_metrics(device): + # Fetch NVME metrics + attributes = smart_ctl( + '--attributes', *device.smartctl_select() + ) + + # replace multiple occurrences of whitespaces with a singel whitespace + attributes = re.sub(r'[\t\x20]+', ' ', attributes) + + # Turn smartctl output into a list of lines and skip to the table of + # SMART attributes. + attribute_lines = attributes.strip().split('\n')[6:] + for line in attribute_lines: + label, value = line.split(':') + if label == 'Available Spare': + yield Metric('available_spare_ratio', device.base_labels, value[0:-1]) + elif label == 'Available Spare Threshold': + yield Metric('available_spare_threshold_ratio', device.base_labels, value[0:-1]) + elif label == 'Percentage Used': + yield Metric('percentage_used_ratio', device.base_labels, value[0:-1]) + elif label == 'Power Cycle': + yield Metric('power_cycles_total', device.base_labels, value) + elif label == 'Power On Hours': + yield Metric('power_on_hours_total', device.base_labels, value.replace(',', '')) + elif label == 'Temperature': + yield Metric('temperature_celcius', device.base_labels, value.replace(' Celsius', '')) + elif label == 'Unsafe Shutdowns': + yield Metric('unsafe_shutdowns_total', device.base_labels, value) + elif label == 'Media and Data Integrity Errors': + yield Metric('media_errors_total', device.base_labels, value) + elif label == 'Error Information Log Entries': + yield Metric('num_err_log_entries_total', device.base_labels, value) + elif label == 'Warning Comp. Temperature Time': + yield Metric('warning_temperature_time_total', device.base_labels, value) + elif label == 'Critical Comp. Temperature Time': + yield Metric('critical_temperature_time_total', device.base_labels, value) + + def collect_disks_smart_metrics(wakeup_disks): now = int(datetime.datetime.utcnow().timestamp()) @@ -362,6 +405,8 @@ def collect_disks_smart_metrics(wakeup_disks): yield from collect_ata_error_count(device) + if device.type == 'nvme': + yield from collect_nvme_metrics(device) def main(): parser = argparse.ArgumentParser() From 182b74cff5d94d4eefa2772ca42933c7007ff9ef Mon Sep 17 00:00:00 2001 From: Christoph Wiechert Date: Fri, 11 Feb 2022 17:44:29 +0100 Subject: [PATCH 2/3] Fix linting errors Signed-off-by: Christoph Wiechert --- smartmon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/smartmon.py b/smartmon.py index b7f2d0a..1514769 100755 --- a/smartmon.py +++ b/smartmon.py @@ -408,6 +408,7 @@ def collect_disks_smart_metrics(wakeup_disks): if device.type == 'nvme': yield from collect_nvme_metrics(device) + def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--wakeup-disks', dest='wakeup_disks', action='store_true') From 8ea1c2594f95e6768b0e95d4b47748adeafaeeb0 Mon Sep 17 00:00:00 2001 From: Christoph Wiechert Date: Sat, 1 Oct 2022 16:04:02 +0200 Subject: [PATCH 3/3] Update smartmon.py Co-authored-by: Linus Heckemann Signed-off-by: Christoph Wiechert --- smartmon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smartmon.py b/smartmon.py index 1514769..1aa6a75 100755 --- a/smartmon.py +++ b/smartmon.py @@ -355,7 +355,7 @@ def collect_nvme_metrics(device): elif label == 'Power On Hours': yield Metric('power_on_hours_total', device.base_labels, value.replace(',', '')) elif label == 'Temperature': - yield Metric('temperature_celcius', device.base_labels, value.replace(' Celsius', '')) + yield Metric('temperature_celsius', device.base_labels, value.replace(' Celsius', '')) elif label == 'Unsafe Shutdowns': yield Metric('unsafe_shutdowns_total', device.base_labels, value) elif label == 'Media and Data Integrity Errors':