diff --git a/BaseCollector.py b/BaseCollector.py index a50b688e..b587c5c9 100644 --- a/BaseCollector.py +++ b/BaseCollector.py @@ -148,7 +148,7 @@ def wait_for_inventory_data(self): logger.info(f'-----Initial query done------: {self.name}') return - def create_api_response_metric(self, collector: str, api_responding: int) -> GaugeMetricFamily: + def create_api_response_code_metric(self, collector: str, api_responding: int) -> GaugeMetricFamily: gauge = GaugeMetricFamily('vrops_api_response', 'vrops-exporter', labels=['target', 'class']) gauge.add_metric(labels=[self.target, collector.lower()], value=api_responding) @@ -157,6 +157,12 @@ def create_api_response_metric(self, collector: str, api_responding: int) -> Gau return gauge return gauge + def create_api_response_time_metric(self, collector: str, response_time: float) -> GaugeMetricFamily: + gauge = GaugeMetricFamily('vrops_api_response_time_seconds', 'vrops-exporter', + labels=['target', 'class']) + gauge.add_metric(labels=[self.target, collector.lower()], value=response_time) + return gauge + def generate_metrics(self, label_names: list) -> dict: collector_config = self.read_collector_config() metrics = {m['key']: {'metric_suffix': m['metric_suffix'], diff --git a/collectors/PropertiesCollector.py b/collectors/PropertiesCollector.py index 9ffdb4de..5cb931ef 100644 --- a/collectors/PropertiesCollector.py +++ b/collectors/PropertiesCollector.py @@ -28,12 +28,13 @@ def collect(self): metrics = self.generate_metrics(label_names=self.label_names) project_ids = self.get_project_ids_by_target() if self.project_ids else [] - values, api_responding = self.vrops.get_latest_properties_multiple(self.target, - token, - uuids, - [m for m in metrics], - self.name) - yield self.create_api_response_metric(self.name, api_responding) + values, api_responding, response_time = self.vrops.get_latest_properties_multiple(self.target, + token, + uuids, + [m for m in metrics], + self.name) + yield self.create_api_response_code_metric(self.name, api_responding) + yield self.create_api_response_time_metric(self.name, response_time) if not values: logger.warning(f'No values in the response for {self.name}. API code: {api_responding}') @@ -47,6 +48,8 @@ def collect(self): for value_entry in resource.get('property-contents', {}).get('property-content', []): labels = self.set_labels(resource_id, project_ids) + if not labels: + continue statkey = value_entry.get('statKey') values_received.add(statkey) diff --git a/collectors/StatsCollector.py b/collectors/StatsCollector.py index 8a3d6f92..5b49b6fb 100644 --- a/collectors/StatsCollector.py +++ b/collectors/StatsCollector.py @@ -29,12 +29,13 @@ def collect(self): metrics = self.generate_metrics(label_names=self.label_names) project_ids = self.get_project_ids_by_target() if self.project_ids else [] - values, api_responding = self.vrops.get_latest_stats_multiple(self.target, - token, - uuids, - [m for m in metrics], - self.name) - yield self.create_api_response_metric(self.name, api_responding) + values, api_responding, response_time = self.vrops.get_latest_stats_multiple(self.target, + token, + uuids, + [m for m in metrics], + self.name) + yield self.create_api_response_code_metric(self.name, api_responding) + yield self.create_api_response_time_metric(self.name, response_time) if not values: logger.warning(f'No values in the response for {self.name}. API code: {api_responding}') @@ -46,6 +47,8 @@ def collect(self): for resource in values: resource_id = resource.get('resourceId') labels = self.set_labels(resource_id, project_ids) + if not labels: + continue for value_entry in resource.get('stat-list', {}).get('stat', []): statkey = value_entry.get('statKey', {}).get('key') diff --git a/collectors/VMPropertiesCollector.py b/collectors/VMPropertiesCollector.py index 142bd375..964a986b 100644 --- a/collectors/VMPropertiesCollector.py +++ b/collectors/VMPropertiesCollector.py @@ -14,7 +14,7 @@ def get_resource_uuids(self): def set_labels(self, resource_id, project_ids): project_id = [vm_id_project_mapping[resource_id] for vm_id_project_mapping in project_ids if - resource_id in vm_id_project_mapping] + resource_id in vm_id_project_mapping] if resource_id in self.vms else [] project_id = project_id[0] if project_id else 'internal' return [self.vms[resource_id]['name'], @@ -22,4 +22,4 @@ def set_labels(self, resource_id, project_ids): self.vms[resource_id]['datacenter'].lower(), self.vms[resource_id]['cluster'], self.vms[resource_id]['parent_host_name'], - project_id] if resource_id else [] + project_id] if resource_id else [] if resource_id in self.vms else [] diff --git a/collectors/VMStatsCollector.py b/collectors/VMStatsCollector.py index 12976c75..3fa7c16c 100644 --- a/collectors/VMStatsCollector.py +++ b/collectors/VMStatsCollector.py @@ -14,7 +14,7 @@ def get_resource_uuids(self): def set_labels(self, resource_id, project_ids): project_id = [vm_id_project_mapping[resource_id] for vm_id_project_mapping in project_ids if - resource_id in vm_id_project_mapping] + resource_id in vm_id_project_mapping] if resource_id in self.vms else [] project_id = project_id[0] if project_id else 'internal' return [self.vms[resource_id]['name'], @@ -22,4 +22,4 @@ def set_labels(self, resource_id, project_ids): self.vms[resource_id]['datacenter'].lower(), self.vms[resource_id]['cluster'], self.vms[resource_id]['parent_host_name'], - project_id] if resource_id else [] + project_id] if resource_id in self.vms else [] diff --git a/tests/TestCollectors.py b/tests/TestCollectors.py index 57546c16..45ce9af0 100644 --- a/tests/TestCollectors.py +++ b/tests/TestCollectors.py @@ -129,7 +129,7 @@ def test_collector_metrics(self): multiple_metrics_generated.append( {"resourceId": "3628-93a1-56e84634050814", "stat-list": {"stat": [ {"timestamps": [1582797716394], "statKey": {"key": metric['key']}, "data": [55.0]}]}}) - Vrops.get_latest_stats_multiple = MagicMock(return_value=(multiple_metrics_generated, 200)) + Vrops.get_latest_stats_multiple = MagicMock(return_value=(multiple_metrics_generated, 200, 0.5)) if "Properties" in collector: multiple_metrics_generated = list() @@ -149,7 +149,7 @@ def test_collector_metrics(self): "property-content": [ {"timestamps": [1582797716394], "statKey": metric['key'], "values": ["test"]}]}}) - Vrops.get_latest_properties_multiple = MagicMock(return_value=(multiple_metrics_generated, 200)) + Vrops.get_latest_properties_multiple = MagicMock(return_value=(multiple_metrics_generated, 200, 0.5)) thread_list = list() diff --git a/tests/collector_config.yaml b/tests/collector_config.yaml index 22eba7c4..45119af9 100644 --- a/tests/collector_config.yaml +++ b/tests/collector_config.yaml @@ -228,6 +228,8 @@ VMStatsCPUCollector: key: "cpu|latency_average" - metric_suffix: "cpu_wait_summation_miliseconds" key: "cpu|wait_summation" + - metric_suffix: "cpu_io_wait_percentage" + key: "cpu|iowaitPct" VMStatsNetworkCollector: - metric_suffix: "network_packets_dropped_rx_number" diff --git a/tests/metrics.yaml b/tests/metrics.yaml index b1741b89..8c45f041 100644 --- a/tests/metrics.yaml +++ b/tests/metrics.yaml @@ -1,5 +1,6 @@ VCenterStatsCollector: - 'vrops_api_response{class="vcenterstatscollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vcenterstatscollector",target="testhost.test"}' - 'vrops_vcenter_diskspace_usage_gigabytes{datacenter="datacenter3",vcenter="vcenter1"}' - 'vrops_vcenter_diskspace_total_gigabytes{datacenter="datacenter3",vcenter="vcenter1"}' - 'vrops_vcenter_vcsa_certificate_remaining_days{datacenter="datacenter3",vcenter="vcenter1"}' @@ -8,6 +9,7 @@ VCenterStatsCollector: VCenterPropertiesCollector: - 'vrops_api_response{class="vcenterpropertiescollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vcenterpropertiescollector",target="testhost.test"}' - 'vrops_vcenter_vc_fullname{datacenter="datacenter3",vcenter="vcenter1"}' - 'vrops_vcenter_summary_version{datacenter="datacenter3",summary_version="test",vcenter="vcenter1"}' - 'vrops_vcenter_vc_fullname{datacenter="datacenter3",vc_fullname="test",vcenter="vcenter1"}' @@ -15,6 +17,7 @@ VCenterPropertiesCollector: ClusterStatsCollector: - 'vrops_api_response{class="clusterstatscollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="clusterstatscollector",target="testhost.test"}' - 'vrops_cluster_summary_total_number_vms{datacenter="datacenter3",vccluster="cluster1",vcenter="vcenter1"}' - 'vrops_cluster_cpu_usage_mhz{datacenter="datacenter3",vccluster="cluster1",vcenter="vcenter1"}' - 'vrops_cluster_cluster_running_hosts{datacenter="datacenter3",vccluster="cluster1",vcenter="vcenter1"}' @@ -26,6 +29,7 @@ ClusterStatsCollector: ClusterPropertiesCollector: - 'vrops_api_response{class="clusterpropertiescollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="clusterpropertiescollector",target="testhost.test"}' - 'vrops_cluster_configuration_dasconfig_admissioncontrolpolicyid{datacenter="datacenter3",vccluster="cluster1",vcenter="vcenter1"}' - 'vrops_cluster_configuration_drsconfig_defaultvmbehavior{datacenter="datacenter3",state="test",vccluster="cluster1",vcenter="vcenter1"}' - 'vrops_cluster_configuration_drsconfig_enabled{datacenter="datacenter3",state="n/a",vccluster="cluster1",vcenter="vcenter1"}' @@ -41,6 +45,7 @@ ClusterPropertiesCollector: HostSystemStatsCollector: - 'vrops_api_response{class="hostsystemstatscollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="hostsystemstatscollector",target="testhost.test"}' - 'vrops_hostsystem_cpu_co_stop_miliseconds{datacenter="datacenter3",hostsystem="hostsystem1",vccluster="cluster3",vcenter="vcenter1"}' - 'vrops_hostsystem_summary_number_vmotion_total{datacenter="datacenter3",hostsystem="hostsystem1",vccluster="cluster3",vcenter="vcenter1"}' - 'vrops_hostsystem_memory_consumed_by_vms_kilobytes{datacenter="datacenter3",hostsystem="hostsystem1",vccluster="cluster3",vcenter="vcenter1"}' @@ -77,6 +82,7 @@ HostSystemStatsCollector: HostSystemPropertiesCollector: - 'vrops_api_response{class="hostsystempropertiescollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="hostsystempropertiescollector",target="testhost.test"}' - 'vrops_hostsystem_runtime_powerstate{datacenter="datacenter3",hostsystem="hostsystem1",state="n/a",vccluster="cluster3",vcenter="vcenter1"}' - 'vrops_hostsystem_sys_build{datacenter="datacenter3",hostsystem="hostsystem1",sys_build="test",vccluster="cluster3",vcenter="vcenter1"}' - 'vrops_hostsystem_runtime_connectionstate{datacenter="datacenter3",hostsystem="hostsystem1",state="test",vccluster="cluster3",vcenter="vcenter1"}' @@ -94,6 +100,7 @@ HostSystemPropertiesCollector: DatastoreStatsCollector: - 'vrops_api_response{class="datastorestatscollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="datastorestatscollector",target="testhost.test"}' - 'vrops_datastore_diskspace_total_usage_gigabytes{datacenter="datacenter3",datastore="vmfs_vc-w-0_p_ssd_bb091_001",type="vmfs_p_ssd",vcenter="vcenter1"}' - 'vrops_datastore_summary_total_number_vms{datacenter="datacenter3",datastore="vmfs_vc-w-0_p_ssd_bb091_001",type="vmfs_p_ssd",vcenter="vcenter1"}' - 'vrops_datastore_diskspace_capacity_gigabytes{datacenter="datacenter3",datastore="vmfs_vc-w-0_p_ssd_bb091_001",type="vmfs_p_ssd",vcenter="vcenter1"}' @@ -101,11 +108,13 @@ DatastoreStatsCollector: DatastorePropertiesCollector: - 'vrops_api_response{class="datastorepropertiescollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="datastorepropertiescollector",target="testhost.test"}' - 'vrops_datastore_summary_datastore_accessible{datacenter="datacenter3",datastore="vmfs_vc-w-0_p_ssd_bb091_001",state="test",type="vmfs_p_ssd",vcenter="vcenter1"}' - 'vrops_datastore_summary_datastore_accessible{datacenter="datacenter3",datastore="vmfs_vc-w-0_p_ssd_bb091_001",state="n/a",type="vmfs_p_ssd",vcenter="vcenter1"}' VMStatsNetworkCollector: - 'vrops_api_response{class="vmstatsnetworkcollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmstatsnetworkcollector",target="testhost.test"}' - 'vrops_virtualmachine_network_packets_dropped_rx_number{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_network_packets_tx_number{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_network_packets_rx_number{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' @@ -116,6 +125,7 @@ VMStatsNetworkCollector: VMStatsCPUCollector: - 'vrops_api_response{class="vmstatscpucollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmstatscpucollector",target="testhost.test"}' - 'vrops_virtualmachine_cpu_wait_summation_miliseconds{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_cpu_usage_ratio{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_cpu_contention_ratio{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' @@ -124,9 +134,11 @@ VMStatsCPUCollector: - 'vrops_virtualmachine_cpu_latency_average{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_cpu_ready_ratio{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_cpu_usage_average_mhz{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' + - 'vrops_virtualmachine_cpu_io_wait_percentage{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' VMStatsMemoryCollector: - 'vrops_api_response{class="vmstatsmemorycollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmstatsmemorycollector",target="testhost.test"}' - 'vrops_virtualmachine_memory_activewrite_kilobytes{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_memory_active_ratio{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_memory_usage_average{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' @@ -139,6 +151,7 @@ VMStatsMemoryCollector: VMStatsVirtualDiskCollector: - 'vrops_api_response{class="vmstatsvirtualdiskcollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmstatsvirtualdiskcollector",target="testhost.test"}' - 'vrops_virtualmachine_virtual_disk_read_kilobytes_per_second{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_virtual_disk_outstanding_read_number{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_virtual_disk_outstanding_write_number{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' @@ -150,6 +163,7 @@ VMStatsVirtualDiskCollector: VMStatsDefaultCollector: - 'vrops_api_response{class="vmstatsdefaultcollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmstatsdefaultcollector",target="testhost.test"}' - 'vrops_virtualmachine_datastore_total{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_datastore_outstanding_io_requests{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_diskspace_virtual_machine_used_gigabytes{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' @@ -162,6 +176,7 @@ VMStatsDefaultCollector: VMPropertiesCollector: - 'vrops_api_response{class="vmpropertiescollector",target="testhost.test"}' + - 'vrops_api_response_time_seconds{class="vmpropertiescollector",target="testhost.test"}' - 'vrops_virtualmachine_summary_ethernetcards{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",summary_ethernetCards="test",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_config_hardware_memory_kilobytes{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' - 'vrops_virtualmachine_summary_ethernetcards{datacenter="datacenter3",hostsystem="hostsystem3",project="0815",vccluster="cluster3",vcenter="vcenter1",virtualmachine="vm1"}' diff --git a/tools/Vrops.py b/tools/Vrops.py index f1ea5216..b0fb66e0 100644 --- a/tools/Vrops.py +++ b/tools/Vrops.py @@ -145,7 +145,7 @@ def get_vms(self, target, token, parent_uuids): return self.get_resources(target, token, parent_uuids, resourcekinds=["VirtualMachine"], data_receiving=True) def get_latest_values_multiple(self, target: str, token: str, uuids: list, keys: list, collector: str, - kind: str = None) -> (list, int): + kind: str = None) -> (list, int, float): # vrops can not handle more than 1000 uuids for stats uuids_chunked = list(chunk_list(uuids, 1000)) if kind == 'stats' else [uuids] @@ -180,18 +180,20 @@ def get_latest_values_multiple(self, target: str, token: str, uuids: list, keys: t.join() return_list = list() - response_status_code = 503 + response_status_codes = list() + response_time_elapsed = list() while not q.empty(): returned_chunks = q.get() - response_status_code = returned_chunks[1] if returned_chunks[1] > 200 else 200 + response_time_elapsed.append(returned_chunks[2]) + response_status_codes.append(returned_chunks[1]) return_list.extend(returned_chunks[0]) logger.debug(f'Amount uuids: {len(uuids)}') logger.debug(f'Fetched : {len({r.get("resourceId") for r in return_list})}') logger.debug('<--------------------------------------------------') - return return_list, response_status_code + return return_list, max(response_status_codes), sum(response_time_elapsed) / len(response_time_elapsed) def get_latest_properties_multiple(self, target: str, token: str, uuids: list, keys: list, collector: str): return self.get_latest_values_multiple(target, token, uuids, keys, collector, kind='properties') @@ -221,18 +223,18 @@ def _get_chunk(q, uuid_list, url, headers, keys, target, kind, collector, chunk_ timeout=30) except Exception as e: logger.error(f'{collector} has problems getting latest data from: {target} - Error: {e}') - return False, 503 + return [], 503, 999 if response.status_code == 200: try: - q.put([response.json().get('values', []), response.status_code]) + q.put([response.json().get('values', []), response.status_code, response.elapsed.total_seconds()]) except json.decoder.JSONDecodeError as e: logger.error(f'Catching JSONDecodeError for {collector}, target: {collector}, chunk_iteration: ' f'{chunk_iteration} - Error: {e}') - return False, response.status_code + return [], response.status_code, response.elapsed.total_seconds() else: logger.error(f'Return code: {response.status_code} != 200 for {collector} : {response.text}') - return False, response.status_code + return [], response.status_code, response.elapsed.total_seconds() def get_project_ids(target: str, token: str, uuids: list, collector: str) -> (list, int): logger.debug('>---------------------------------- get_project_ids')