Skip to content

Commit

Permalink
gather etcd_server_slow metrics (#897)
Browse files Browse the repository at this point in the history
  • Loading branch information
tremes committed Jan 31, 2024
1 parent 226e253 commit ec30666
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
5 changes: 4 additions & 1 deletion docs/gathered-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -1175,13 +1175,14 @@ Collects cluster Federated Monitoring metrics.
The GET REST query to URL /federate
Gathered metrics:
- `virt_platform`
- `etcd_object_counts`
- `cluster_installer`
- `vsphere_node_hw_version_total`
- namespace CPU and memory usage
- `console_helm_installs_total`
- `console_helm_upgrades_total`
- `console_helm_uninstalls_total`
- `etcd_server_slow_apply_total`
- `etcd_server_slow_read_indexes_total`
- followed by at most 1000 lines of `ALERTS` metric

### API Reference
Expand Down Expand Up @@ -1214,6 +1215,8 @@ None
- `console_helm_upgrades_total` introduced in version 4.12+
- `console_helm_uninstalls_total` introduced in version 4.12+
- `openshift_apps_deploymentconfigs_strategy_total` introduced in version 4.13+ and backported to 4.12.5+ version
- `etcd_server_slow_apply_total` introduced in version 4.16+
- `etcd_server_slow_read_indexes_total` introduced in version 4.16+


## MutatingWebhookConfigurations
Expand Down
8 changes: 8 additions & 0 deletions docs/insights-archive-sample/config/metrics
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# TYPE cluster_installer untyped
cluster_installer{endpoint="metrics",instance="10.0.0.4:9099",invoker="openshift-internal-ci/release-openshift-origin-installer-launch-gcp/1275022284997791744",job="cluster-version-operator",namespace="openshift-cluster-version",pod="cluster-version-operator-5f7b8d89b5-t9cdr",service="cluster-version-operator",type="openshift-install",version="v4.4.0",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1592829361218
# TYPE etcd_server_slow_apply_total untyped
etcd_server_slow_apply_total{endpoint="etcd-metrics",instance="10.0.109.44:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-109-44.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 8 1706601229875
etcd_server_slow_apply_total{endpoint="etcd-metrics",instance="10.0.63.176:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-63-176.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 0 1706601227589
etcd_server_slow_apply_total{endpoint="etcd-metrics",instance="10.0.96.175:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-96-175.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1706601209352
# TYPE etcd_server_slow_read_indexes_total untyped
etcd_server_slow_read_indexes_total{endpoint="etcd-metrics",instance="10.0.109.44:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-109-44.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 0 1706601229875
etcd_server_slow_read_indexes_total{endpoint="etcd-metrics",instance="10.0.63.176:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-63-176.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 0 1706601227589
etcd_server_slow_read_indexes_total{endpoint="etcd-metrics",instance="10.0.96.175:9979",job="etcd",namespace="openshift-etcd",pod="etcd-ip-10-0-96-175.us-west-1.compute.internal",service="etcd",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 0 1706601209352
# TYPE console_helm_installs_total untyped
console_helm_installs_total{chart_name="elasticsearch-sed",chart_version="1.0.0",container="console",endpoint="https",instance="10.129.0.63:8443",job="console",namespace="openshift-console",pod="console-8d5d6d756-6nscn",service="console",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1660729192308
console_helm_installs_total{chart_name="redis-sed",chart_version="1.0.1",container="console",endpoint="https",instance="10.129.0.63:8443",job="console",namespace="openshift-console",pod="console-8d5d6d756-6nscn",service="console",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1660729192308
Expand Down
7 changes: 6 additions & 1 deletion pkg/gatherers/clusterconfig/gather_most_recent_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ const (
// The GET REST query to URL /federate
// Gathered metrics:
// - `virt_platform`
// - `etcd_object_counts`
// - `cluster_installer`
// - `vsphere_node_hw_version_total`
// - namespace CPU and memory usage
// - `console_helm_installs_total`
// - `console_helm_upgrades_total`
// - `console_helm_uninstalls_total`
// - `etcd_server_slow_apply_total`
// - `etcd_server_slow_read_indexes_total`
// - followed by at most 1000 lines of `ALERTS` metric
//
// ### API Reference
Expand Down Expand Up @@ -65,6 +66,8 @@ const (
// - `console_helm_upgrades_total` introduced in version 4.12+
// - `console_helm_uninstalls_total` introduced in version 4.12+
// - `openshift_apps_deploymentconfigs_strategy_total` introduced in version 4.13+ and backported to 4.12.5+ version
// - `etcd_server_slow_apply_total` introduced in version 4.16+
// - `etcd_server_slow_read_indexes_total` introduced in version 4.16+
func (g *Gatherer) GatherMostRecentMetrics(ctx context.Context) ([]record.Record, []error) {
metricsRESTClient, err := rest.RESTClientFor(g.metricsGatherKubeConfig)
if err != nil {
Expand All @@ -86,6 +89,8 @@ func gatherMostRecentMetrics(ctx context.Context, metricsClient rest.Interface)
Param("match[]", "console_helm_upgrades_total").
Param("match[]", "console_helm_uninstalls_total").
Param("match[]", "openshift_apps_deploymentconfigs_strategy_total").
Param("match[]", "etcd_server_slow_apply_total").
Param("match[]", "etcd_server_slow_read_indexes_total").
DoRaw(ctx)
if err != nil {
klog.Errorf("Unable to retrieve most recent metrics: %v", err)
Expand Down

0 comments on commit ec30666

Please sign in to comment.