Skip to content

Commit

Permalink
Merge pull request #24117 from smarterclayton/platform_metric_test
Browse files Browse the repository at this point in the history
test: Verify platform metrics are available
  • Loading branch information
openshift-merge-robot committed Nov 22, 2019
2 parents 5f4370e + 10c6be0 commit cc2707f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 16 deletions.
29 changes: 27 additions & 2 deletions test/extended/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ import (
exutil "github.com/openshift/origin/test/extended/util"
)

const waitForPrometheusStartSeconds = 240
const (
maxPrometheusQueryRetries = 5
)

var _ = g.Describe("[Feature:Prometheus][Conformance] Prometheus", func() {
defer g.GinkgoRecover()
Expand Down Expand Up @@ -81,7 +83,7 @@ var _ = g.Describe("[Feature:Prometheus][Conformance] Prometheus", func() {

g.By("checking the unsecured metrics path")
var metrics map[string]*dto.MetricFamily
o.Expect(wait.PollImmediate(10*time.Second, waitForPrometheusStartSeconds*time.Second, func() (bool, error) {
o.Expect(wait.PollImmediate(10*time.Second, 2*time.Minute, func() (bool, error) {
results, err := getInsecureURLViaPod(ns, execPod.Name, fmt.Sprintf("%s/metrics", url))
if err != nil {
e2e.Logf("unable to get unsecured metrics: %v", err)
Expand Down Expand Up @@ -173,6 +175,29 @@ var _ = g.Describe("[Feature:Prometheus][Conformance] Prometheus", func() {

e2e.Logf("Watchdog alert is firing")
})
g.It("should have important platform topology metrics", func() {
oc.SetupProject()
ns := oc.Namespace()
execPod := exutil.CreateCentosExecPodOrFail(oc.AdminKubeClient(), ns, "execpod", nil)
defer func() { oc.AdminKubeClient().CoreV1().Pods(ns).Delete(execPod.Name, metav1.NewDeleteOptions(1)) }()

tests := map[string]bool{
// track infrastructure type
`cluster_infrastructure_provider{type!=""}`: true,
`cluster_feature_set`: true,

// track installer type
`cluster_installer{type!="",invoker!=""}`: true,

// track sum of etcd
`instance:etcd_object_counts:sum > 0`: true,

// track cores and sockets across node types
`sum(node_role_os_version_machine:cpu_capacity_cores:sum{label_kubernetes_io_arch!="",label_node_role_kubernetes_io_master!=""}) > 0`: true,
`sum(node_role_os_version_machine:cpu_capacity_sockets:sum{label_kubernetes_io_arch!="",label_node_hyperthread_enabled!="",label_node_role_kubernetes_io_master!=""}) > 0`: true,
}
runQueries(tests, oc, ns, execPod.Name, url, bearerToken)
})
g.It("should have non-Pod host cAdvisor metrics", func() {
oc.SetupProject()
ns := oc.Namespace()
Expand Down
50 changes: 36 additions & 14 deletions test/extended/prometheus/prometheus_builds.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ var _ = g.Describe("[Feature:Prometheus][Feature:Builds] Prometheus", func() {
// allow for some retry, a la prometheus.go and its initial hitting of the metrics endpoint after
// instantiating prometheus tempalte
var err error
for i := 0; i < waitForPrometheusStartSeconds; i++ {
for i := 0; i < maxPrometheusQueryRetries; i++ {
err = expectURLStatusCodeExec(ns, execPod.Name, url, 403)
if err == nil {
break
Expand Down Expand Up @@ -100,38 +100,60 @@ type prometheusResponseData struct {

func runQueries(promQueries map[string]bool, oc *exutil.CLI, ns, execPodName, baseURL, bearerToken string) {
// expect all correct metrics within a reasonable time period
errsMap := map[string]error{}
for i := 0; i < waitForPrometheusStartSeconds; i++ {
queryErrors := make(map[string]error)
passed := make(map[string]struct{})
for i := 0; i < maxPrometheusQueryRetries; i++ {
for query, expected := range promQueries {
if _, ok := passed[query]; ok {
continue
}
//TODO when the http/query apis discussed at https://github.com/prometheus/client_golang#client-for-the-prometheus-http-api
// and introduced at https://github.com/prometheus/client_golang/blob/master/api/prometheus/v1/api.go are vendored into
// openshift/origin, look to replace this homegrown http request / query param with that API
g.By("perform prometheus metric query " + query)
contents, err := getBearerTokenURLViaPod(ns, execPodName, fmt.Sprintf("%s/api/v1/query?%s", baseURL, (url.Values{"query": []string{query}}).Encode()), bearerToken)
url := fmt.Sprintf("%s/api/v1/query?%s", baseURL, (url.Values{"query": []string{query}}).Encode())
contents, err := getBearerTokenURLViaPod(ns, execPodName, url, bearerToken)
o.Expect(err).NotTo(o.HaveOccurred())
result := prometheusResponse{}
json.Unmarshal([]byte(contents), &result)
metrics := result.Data.Result

delete(errsMap, query) // clear out any prior failures
// check query result, if this is a new error log it, otherwise remain silent
var result prometheusResponse
if err := json.Unmarshal([]byte(contents), &result); err != nil {
e2e.Logf("unable to parse query response for %s: %v", query, err)
continue
}
metrics := result.Data.Result
if result.Status != "success" {
msg := fmt.Sprintf("promQL query: %s had reported incorrect status: %#v", query, metrics)
if prev, ok := queryErrors[query]; !ok || prev.Error() != msg {
e2e.Logf("%s", msg)
}
queryErrors[query] = fmt.Errorf(msg)
continue
}
if (len(metrics) > 0 && !expected) || (len(metrics) == 0 && expected) {
dbg := fmt.Sprintf("promQL query: %s had reported incorrect results: %v", query, metrics)
fmt.Fprintf(g.GinkgoWriter, dbg)
errsMap[query] = fmt.Errorf(dbg)
msg := fmt.Sprintf("promQL query: %s had reported incorrect results: %#v", query, metrics)
if prev, ok := queryErrors[query]; !ok || prev.Error() != msg {
e2e.Logf("%s", msg)
}
queryErrors[query] = fmt.Errorf(msg)
continue
}

// query successful
passed[query] = struct{}{}
delete(queryErrors, query)
}

if len(errsMap) == 0 {
if len(queryErrors) == 0 {
break
}
time.Sleep(time.Second)
}

if len(errsMap) != 0 {
if len(queryErrors) != 0 {
exutil.DumpPodLogsStartingWith("prometheus-0", oc)
}
o.Expect(errsMap).To(o.BeEmpty())
o.Expect(queryErrors).To(o.BeEmpty())
}

func startOpenShiftBuild(oc *exutil.CLI, appTemplate string) *exutil.BuildResult {
Expand Down

0 comments on commit cc2707f

Please sign in to comment.