From 128ab717b0bbc4d75fea59a3c4d3e3a80ac6d985 Mon Sep 17 00:00:00 2001 From: yungwine Date: Mon, 13 Jan 2025 11:52:17 +0400 Subject: [PATCH 1/4] add prometheus metrics pusher --- modules/__init__.py | 5 +++- modules/prometheus.py | 59 ++++++++++++++++++++++++++++++++++++++++++ mytoncore/functions.py | 3 +++ mytoncore/mytoncore.py | 3 +++ 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 modules/prometheus.py diff --git a/modules/__init__.py b/modules/__init__.py index 3d09bb9e..5b0b5acf 100644 --- a/modules/__init__.py +++ b/modules/__init__.py @@ -9,6 +9,7 @@ from modules.controller import ControllerModule from modules.liteserver import LiteserverModule from modules.alert_bot import AlertBotModule +from modules.prometheus import PrometheusModule MODES = { @@ -17,7 +18,8 @@ 'single-nominator': SingleNominatorModule, 'liquid-staking': ControllerModule, 'liteserver': LiteserverModule, - 'alert-bot': AlertBotModule + 'alert-bot': AlertBotModule, + 'prometheus': PrometheusModule } @@ -61,6 +63,7 @@ class Setting: 'ChatId': Setting('alert-bot', None, 'Alerting Telegram chat id'), 'auto_backup': Setting('validator', None, 'Make validator backup every election'), 'auto_backup_path': Setting('validator', '/tmp/mytoncore/auto_backups/', 'Path to store auto-backups'), + 'prometheus_url': Setting('prometheus', None, 'Prometheus pushgateway url'), } diff --git a/modules/prometheus.py b/modules/prometheus.py new file mode 100644 index 00000000..71fc2492 --- /dev/null +++ b/modules/prometheus.py @@ -0,0 +1,59 @@ +from modules.module import MtcModule +import dataclasses +import requests + + +@dataclasses.dataclass +class Metric: + name: str + description: str + type: str + + def to_format(self, value): + return f""" +# HELP {self.name} {self.description} +# TYPE {self.name} {self.type} +{self.name} {value} +""" + + +METRICS = { + 'master_out_of_sync': Metric('validator_masterchain_out_of_sync_seconds', 'Time difference between current time and timestamp of the last known block', 'gauge'), + 'shard_out_of_sync': Metric('validator_shardchain_out_of_sync_blocks', 'Number of blocks validator\'s shardclient is behind the last known block', 'gauge'), + 'out_of_ser': Metric('validator_out_of_serialization', 'Number of blocks last state serialization was ago', 'gauge'), + 'vc_up': Metric('validator_console_up', 'Is validator\'s validator client up', 'gauge'), +} + + +class PrometheusModule(MtcModule): + + description = 'Prometheus format data exporter' + default_value = False + + def __init__(self, ton, local, *args, **kwargs): + super().__init__(ton, local, *args, **kwargs) + + def get_validator_status_metrics(self): + status = self.ton.GetValidatorStatus() + result = [] + if status.masterchain_out_of_sync is not None: + result.append(METRICS['master_out_of_sync'].to_format(status.masterchain_out_of_sync)) + if status.shardchain_out_of_sync is not None: + result.append(METRICS['shard_out_of_sync'].to_format(status.shardchain_out_of_sync)) + if status.masterchain_out_of_ser is not None: + result.append(METRICS['out_of_ser'].to_format(status.masterchain_out_of_ser)) + result.append(METRICS['vc_up'].to_format(int(status.is_working))) + return result + + def push_metrics(self): + if not self.ton.using_prometheus(): + return + + url = self.ton.local.db.get('prometheus_url') + if url is None: + raise Exception('Prometheus url is not set') + metrics = self.get_validator_status_metrics() + requests.post(url, data='\n'.join(metrics).encode()) + + def add_console_commands(self, console): + ... diff --git a/mytoncore/functions.py b/mytoncore/functions.py index 0dd136ff..96f2abf7 100755 --- a/mytoncore/functions.py +++ b/mytoncore/functions.py @@ -572,6 +572,9 @@ def General(local): from modules.alert_bot import AlertBotModule local.start_cycle(AlertBotModule(ton, local).check_status, sec=60, args=()) + from modules.prometheus import PrometheusModule + local.start_cycle(PrometheusModule(ton, local).push_metrics, sec=30, args=()) + thr_sleep() # end define diff --git a/mytoncore/mytoncore.py b/mytoncore/mytoncore.py index 6896bf79..5e2e866a 100644 --- a/mytoncore/mytoncore.py +++ b/mytoncore/mytoncore.py @@ -3123,6 +3123,9 @@ def using_liteserver(self): def using_alert_bot(self): return self.get_mode_value('alert-bot') + def using_prometheus(self): + return self.get_mode_value('prometheus') + def Tlb2Json(self, text): # Заменить скобки start = 0 From 8b080e6f4b029f8de8a94279575551678ca4154e Mon Sep 17 00:00:00 2001 From: yungwine Date: Thu, 16 Jan 2025 20:27:05 +0400 Subject: [PATCH 2/4] add validator_validation_metrics --- modules/prometheus.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/modules/prometheus.py b/modules/prometheus.py index 71fc2492..31dfb8c7 100644 --- a/modules/prometheus.py +++ b/modules/prometheus.py @@ -19,9 +19,11 @@ def to_format(self, value): METRICS = { 'master_out_of_sync': Metric('validator_masterchain_out_of_sync_seconds', 'Time difference between current time and timestamp of the last known block', 'gauge'), - 'shard_out_of_sync': Metric('validator_shardchain_out_of_sync_blocks', 'Number of blocks validator\'s shardclient is behind the last known block', 'gauge'), + 'shard_out_of_sync': Metric('validator_shardchain_out_of_sync_blocks', 'Number of blocks node\'s shardclient is behind the last known block', 'gauge'), 'out_of_ser': Metric('validator_out_of_serialization', 'Number of blocks last state serialization was ago', 'gauge'), - 'vc_up': Metric('validator_console_up', 'Is validator\'s validator client up', 'gauge'), + 'vc_up': Metric('validator_console_up', 'Is `validator-console` up', 'gauge'), + 'validator_id': Metric('validator_index', 'Validator index', 'gauge'), + 'validator_stake': Metric('validator_stake', 'Validator stake', 'gauge'), } @@ -33,9 +35,8 @@ class PrometheusModule(MtcModule): def __init__(self, ton, local, *args, **kwargs): super().__init__(ton, local, *args, **kwargs) - def get_validator_status_metrics(self): + def get_validator_status_metrics(self, result: list): status = self.ton.GetValidatorStatus() - result = [] if status.masterchain_out_of_sync is not None: result.append(METRICS['master_out_of_sync'].to_format(status.masterchain_out_of_sync)) if status.shardchain_out_of_sync is not None: @@ -43,7 +44,18 @@ def get_validator_status_metrics(self): if status.masterchain_out_of_ser is not None: result.append(METRICS['out_of_ser'].to_format(status.masterchain_out_of_ser)) result.append(METRICS['vc_up'].to_format(int(status.is_working))) - return result + + def get_validator_validation_metrics(self, result: list): + index = self.ton.GetValidatorIndex() + result.append(METRICS['validator_id'].to_format(index)) + config = self.ton.GetConfig34() + save_elections = self.ton.GetSaveElections() + elections = save_elections.get(str(config["startWorkTime"])) + if elections is not None: + adnl = self.ton.GetAdnlAddr() + stake = elections.get(adnl, {}).get('stake') + if stake: + result.append(METRICS['validator_stake'].to_format(round(stake, 2))) def push_metrics(self): if not self.ton.using_prometheus(): @@ -52,7 +64,9 @@ def push_metrics(self): url = self.ton.local.db.get('prometheus_url') if url is None: raise Exception('Prometheus url is not set') - metrics = self.get_validator_status_metrics() + metrics = [] + self.local.try_function(self.get_validator_status_metrics, args=[metrics]) + self.local.try_function(self.get_validator_validation_metrics, args=[metrics]) requests.post(url, data='\n'.join(metrics).encode()) def add_console_commands(self, console): From 207a517d24777cda8120a1d9ef6954ab1fa1aff6 Mon Sep 17 00:00:00 2001 From: yungwine Date: Fri, 17 Jan 2025 13:49:31 +0400 Subject: [PATCH 3/4] add celldb prometheus metrics --- modules/prometheus.py | 10 ++++++++-- mytoncore/mytoncore.py | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/prometheus.py b/modules/prometheus.py index 31dfb8c7..6d1d3114 100644 --- a/modules/prometheus.py +++ b/modules/prometheus.py @@ -23,7 +23,9 @@ def to_format(self, value): 'out_of_ser': Metric('validator_out_of_serialization', 'Number of blocks last state serialization was ago', 'gauge'), 'vc_up': Metric('validator_console_up', 'Is `validator-console` up', 'gauge'), 'validator_id': Metric('validator_index', 'Validator index', 'gauge'), - 'validator_stake': Metric('validator_stake', 'Validator stake', 'gauge'), + 'stake': Metric('validator_stake', 'Validator stake', 'gauge'), + 'celldb_gc_block': Metric('validator_celldb_gc_block', 'Celldb GC block latency', 'gauge'), + 'celldb_gc_state': Metric('validator_celldb_gc_state', 'Celldb GC queue size', 'gauge'), } @@ -43,6 +45,10 @@ def get_validator_status_metrics(self, result: list): result.append(METRICS['shard_out_of_sync'].to_format(status.shardchain_out_of_sync)) if status.masterchain_out_of_ser is not None: result.append(METRICS['out_of_ser'].to_format(status.masterchain_out_of_ser)) + if status.masterchainblock is not None and status.gcmasterchainblock is not None: + result.append(METRICS['celldb_gc_block'].to_format(status.masterchainblock - status.gcmasterchainblock)) + if status.gcmasterchainblock is not None and status.last_deleted_mc_state is not None: + result.append(METRICS['celldb_gc_state'].to_format(status.gcmasterchainblock - status.last_deleted_mc_state)) result.append(METRICS['vc_up'].to_format(int(status.is_working))) def get_validator_validation_metrics(self, result: list): @@ -55,7 +61,7 @@ def get_validator_validation_metrics(self, result: list): adnl = self.ton.GetAdnlAddr() stake = elections.get(adnl, {}).get('stake') if stake: - result.append(METRICS['validator_stake'].to_format(round(stake, 2))) + result.append(METRICS['stake'].to_format(round(stake, 2))) def push_metrics(self): if not self.ton.using_prometheus(): diff --git a/mytoncore/mytoncore.py b/mytoncore/mytoncore.py index 5e2e866a..2be92a34 100644 --- a/mytoncore/mytoncore.py +++ b/mytoncore/mytoncore.py @@ -796,6 +796,7 @@ def GetValidatorStatus(self): status.masterchain_out_of_ser = status.masterchainblock - status.stateserializermasterchainseqno status.out_of_sync = status.masterchain_out_of_sync if status.masterchain_out_of_sync > status.shardchain_out_of_sync else status.shardchain_out_of_sync status.out_of_ser = status.masterchain_out_of_ser + status.last_deleted_mc_state = int(parse(result, "last_deleted_mc_state", '\n')) except Exception as ex: self.local.add_log(f"GetValidatorStatus warning: {ex}", "warning") status.is_working = False From 8dcae92b4f6569da8a45dcb1316c0eb27d78ffe8 Mon Sep 17 00:00:00 2001 From: yungwine Date: Mon, 20 Jan 2025 14:13:17 +0400 Subject: [PATCH 4/4] improve checking vc_up metric --- modules/prometheus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/prometheus.py b/modules/prometheus.py index 6d1d3114..598695ac 100644 --- a/modules/prometheus.py +++ b/modules/prometheus.py @@ -39,6 +39,7 @@ def __init__(self, ton, local, *args, **kwargs): def get_validator_status_metrics(self, result: list): status = self.ton.GetValidatorStatus() + is_working = status.is_working or (status.unixtime is not None) if status.masterchain_out_of_sync is not None: result.append(METRICS['master_out_of_sync'].to_format(status.masterchain_out_of_sync)) if status.shardchain_out_of_sync is not None: @@ -49,7 +50,7 @@ def get_validator_status_metrics(self, result: list): result.append(METRICS['celldb_gc_block'].to_format(status.masterchainblock - status.gcmasterchainblock)) if status.gcmasterchainblock is not None and status.last_deleted_mc_state is not None: result.append(METRICS['celldb_gc_state'].to_format(status.gcmasterchainblock - status.last_deleted_mc_state)) - result.append(METRICS['vc_up'].to_format(int(status.is_working))) + result.append(METRICS['vc_up'].to_format(int(is_working))) def get_validator_validation_metrics(self, result: list): index = self.ton.GetValidatorIndex()