Skip to content

Commit

Permalink
Merge pull request #821 from martinpovolny/apiserver_request_total
Browse files Browse the repository at this point in the history
manifests/telemetry: replace apiserver_request_count with apiserver_request_total
  • Loading branch information
openshift-merge-robot committed Jul 1, 2020
2 parents 4f01de4 + 378ccae commit 7739eaf
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions Documentation/data-collection.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ data:
# upgrades from failing for end users.
- '{__name__="ALERTS",alertstate="firing"}'
# the following three metrics will be used for SLA analysis reports.
# (@openshift/openshift-team-olm) code:apiserver_request_count:rate:sum identifies average of occurances
# (@openshift/openshift-team-olm) code:apiserver_request_total:rate:sum identifies average of occurences
# of each http status code over 10 minutes
- '{__name__="code:apiserver_request_count:rate:sum"}'
- '{__name__="code:apiserver_request_total:rate:sum"}'
# (@openshift/openshift-team-olm, @openshift/openshift-team-cluster-manager)
# cluster:capacity_cpu_cores:sum is the total number of CPU cores in the
# cluster labeled by node role and type.
Expand Down
12 changes: 6 additions & 6 deletions Documentation/sample-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ return the full set of metrics that the Telemeter client captures:

[embedmd]:# (telemeter_query txt)
```txt
{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_count:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|csv_succeeded|csv_abnormal|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|monitoring:haproxy_server_http_responses_total:sum|rhmi_status",alertstate=~"firing|"}
{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|csv_succeeded|csv_abnormal|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|monitoring:haproxy_server_http_responses_total:sum|rhmi_status",alertstate=~"firing|"}
```

For reference, here is an example response produced by a running OpenShift cluster:
Expand Down Expand Up @@ -165,11 +165,11 @@ cluster_version_available_updates{channel="stable-4.2",endpoint="metrics",instan
# TYPE cluster_version_payload untyped
cluster_version_payload{endpoint="metrics",instance="10.0.150.196:9099",job="cluster-version-operator",namespace="openshift-cluster-version",pod="cluster-version-operator-5cb5f8c9db-st825",service="cluster-version-operator",type="applied",version="4.2.0-0.okd-2019-07-03-073817",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 399 1562168623759
cluster_version_payload{endpoint="metrics",instance="10.0.150.196:9099",job="cluster-version-operator",namespace="openshift-cluster-version",pod="cluster-version-operator-5cb5f8c9db-st825",service="cluster-version-operator",type="pending",version="4.2.0-0.okd-2019-07-03-073817",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0 1562168623759
# TYPE code:apiserver_request_count:rate:sum untyped
code:apiserver_request_count:rate:sum{code="200",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 33.082456140350565 1562168623634
code:apiserver_request_count:rate:sum{code="201",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 4.231578947368422 1562168623634
code:apiserver_request_count:rate:sum{code="404",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.5087719298245614 1562168623634
code:apiserver_request_count:rate:sum{code="409",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.21403508771929822 1562168623634
# TYPE code:apiserver_request_total:rate:sum untyped
code:apiserver_request_total:rate:sum{code="200",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 33.082456140350565 1562168623634
code:apiserver_request_total:rate:sum{code="201",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 4.231578947368422 1562168623634
code:apiserver_request_total:rate:sum{code="404",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.5087719298245614 1562168623634
code:apiserver_request_total:rate:sum{code="409",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.21403508771929822 1562168623634
# TYPE node_uname_info gauge
node_uname_info{domainname="(none)",endpoint="https",instance="10.0.141.152:9100",job="node-exporter",machine="x86_64",namespace="openshift-monitoring",nodename="ip-10-0-141-152",pod="node-exporter-ncdgg",release="4.18.0-80.1.2.el8_0.x86_64",service="node-exporter",sysname="Linux",version="#1 SMP Sun Apr 28 09:21:22 UTC 2019"} 1
node_uname_info{domainname="(none)",endpoint="https",instance="10.0.141.66:9100",job="node-exporter",machine="x86_64",namespace="openshift-monitoring",nodename="ip-10-0-141-66",pod="node-exporter-7rzzv",release="4.18.0-80.1.2.el8_0.x86_64",service="node-exporter",sysname="Linux",version="#1 SMP Sun Apr 28 09:21:22 UTC 2019"} 1
Expand Down
2 changes: 1 addition & 1 deletion Documentation/telemeter_query
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_count:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|csv_succeeded|csv_abnormal|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|monitoring:haproxy_server_http_responses_total:sum|rhmi_status",alertstate=~"firing|"}
{__name__=~"cluster:usage:.*|count:up0|count:up1|cluster_version|cluster_version_available_updates|cluster_operator_up|cluster_operator_conditions|cluster_version_payload|cluster_installer|cluster_infrastructure_provider|cluster_feature_set|instance:etcd_object_counts:sum|ALERTS|code:apiserver_request_total:rate:sum|cluster:capacity_cpu_cores:sum|cluster:capacity_memory_bytes:sum|cluster:cpu_usage_cores:sum|cluster:memory_usage_bytes:sum|openshift:cpu_usage_cores:sum|openshift:memory_usage_bytes:sum|workload:cpu_usage_cores:sum|workload:memory_usage_bytes:sum|cluster:virt_platform_nodes:sum|cluster:node_instance_type_count:sum|cnv:vmi_status_running:count|node_role_os_version_machine:cpu_capacity_cores:sum|node_role_os_version_machine:cpu_capacity_sockets:sum|subscription_sync_total|csv_succeeded|csv_abnormal|ceph_cluster_total_bytes|ceph_cluster_total_used_raw_bytes|ceph_health_status|job:ceph_osd_metadata:count|job:kube_pv:count|job:ceph_pools_iops:total|job:ceph_pools_iops_bytes:total|job:ceph_versions_running:count|job:noobaa_total_unhealthy_buckets:sum|job:noobaa_bucket_count:sum|job:noobaa_total_object_count:sum|noobaa_accounts_num|noobaa_total_usage|console_url|cluster:network_attachment_definition_instances:max|cluster:network_attachment_definition_enabled_instance_up:max|insightsclient_request_send_total|cam_app_workload_migrations|cluster:apiserver_current_inflight_requests:sum:max_over_time:2m|cluster:telemetry_selected_series:count|openshift:prometheus_tsdb_head_series:sum|openshift:prometheus_tsdb_head_samples_appended_total:sum|monitoring:container_memory_working_set_bytes:sum|monitoring:haproxy_server_http_responses_total:sum|rhmi_status",alertstate=~"firing|"}
10 changes: 5 additions & 5 deletions Documentation/timeseries.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ cluster_version_available_updates{channel="stable-4.2",endpoint="metrics",instan
# TYPE cluster_version_payload untyped
cluster_version_payload{endpoint="metrics",instance="10.0.150.196:9099",job="cluster-version-operator",namespace="openshift-cluster-version",pod="cluster-version-operator-5cb5f8c9db-st825",service="cluster-version-operator",type="applied",version="4.2.0-0.okd-2019-07-03-073817",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 399 1562168623759
cluster_version_payload{endpoint="metrics",instance="10.0.150.196:9099",job="cluster-version-operator",namespace="openshift-cluster-version",pod="cluster-version-operator-5cb5f8c9db-st825",service="cluster-version-operator",type="pending",version="4.2.0-0.okd-2019-07-03-073817",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0 1562168623759
# TYPE code:apiserver_request_count:rate:sum untyped
code:apiserver_request_count:rate:sum{code="200",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 33.082456140350565 1562168623634
code:apiserver_request_count:rate:sum{code="201",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 4.231578947368422 1562168623634
code:apiserver_request_count:rate:sum{code="404",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.5087719298245614 1562168623634
code:apiserver_request_count:rate:sum{code="409",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.21403508771929822 1562168623634
# TYPE code:apiserver_request_total:rate:sum untyped
code:apiserver_request_total:rate:sum{code="200",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 33.082456140350565 1562168623634
code:apiserver_request_total:rate:sum{code="201",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 4.231578947368422 1562168623634
code:apiserver_request_total:rate:sum{code="404",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.5087719298245614 1562168623634
code:apiserver_request_total:rate:sum{code="409",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 0.21403508771929822 1562168623634
# TYPE node_uname_info gauge
node_uname_info{domainname="(none)",endpoint="https",instance="10.0.141.152:9100",job="node-exporter",machine="x86_64",namespace="openshift-monitoring",nodename="ip-10-0-141-152",pod="node-exporter-ncdgg",release="4.18.0-80.1.2.el8_0.x86_64",service="node-exporter",sysname="Linux",version="#1 SMP Sun Apr 28 09:21:22 UTC 2019"} 1
node_uname_info{domainname="(none)",endpoint="https",instance="10.0.141.66:9100",job="node-exporter",machine="x86_64",namespace="openshift-monitoring",nodename="ip-10-0-141-66",pod="node-exporter-7rzzv",release="4.18.0-80.1.2.el8_0.x86_64",service="node-exporter",sysname="Linux",version="#1 SMP Sun Apr 28 09:21:22 UTC 2019"} 1
Expand Down
4 changes: 2 additions & 2 deletions manifests/0000_50_cluster_monitoring_operator_04-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ data:
# upgrades from failing for end users.
- '{__name__="ALERTS",alertstate="firing"}'
# the following three metrics will be used for SLA analysis reports.
# (@openshift/openshift-team-olm) code:apiserver_request_count:rate:sum identifies average of occurances
# (@openshift/openshift-team-olm) code:apiserver_request_total:rate:sum identifies average of occurences
# of each http status code over 10 minutes
- '{__name__="code:apiserver_request_count:rate:sum"}'
- '{__name__="code:apiserver_request_total:rate:sum"}'
# (@openshift/openshift-team-olm, @openshift/openshift-team-cluster-manager)
# cluster:capacity_cpu_cores:sum is the total number of CPU cores in the
# cluster labeled by node role and type.
Expand Down

0 comments on commit 7739eaf

Please sign in to comment.