Skip to content

Commit

Permalink
MON-3513: Add availability test for Metrics API
Browse files Browse the repository at this point in the history
This should ensure the availability of the Metrics API during e2e tests including upgrades.
Thus it should also help with https://issues.redhat.com/browse/MON-3539.

The correctness of the API: whether the right/expected content is returned, should be tested elsewhere (we already have tests for that in CMO, and the HPA tests already make use of that etc.). This tests only check the availability.
  • Loading branch information
machine424 committed Apr 24, 2024
1 parent b2195bf commit 93a0eff
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/disruptionnewapiserver"
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/legacykubeapiservermonitortests"
"github.com/openshift/origin/pkg/monitortests/monitoring/statefulsetsrecreation"
"github.com/openshift/origin/pkg/monitortests/monitoring/disruptionmetricsapi"
"github.com/openshift/origin/pkg/monitortests/network/disruptioningress"
"github.com/openshift/origin/pkg/monitortests/network/disruptionpodnetwork"
"github.com/openshift/origin/pkg/monitortests/network/disruptionserviceloadbalancer"
Expand Down Expand Up @@ -118,6 +119,7 @@ func newDefaultMonitorTests(info monitortestframework.MonitorTestInitializationI
monitorTestRegistry.AddMonitorTestOrDie("disruption-summary-serializer", "Test Framework", disruptionserializer.NewDisruptionSummarySerializer())

monitorTestRegistry.AddMonitorTestOrDie("monitoring-statefulsets-recreation", "Monitoring", statefulsetsrecreation.NewStatefulsetsChecker())
monitorTestRegistry.AddMonitorTestOrDie("metrics-api-availability", "Monitoring", disruptionmetricsapi.NewAvailabilityInvariant())

return monitorTestRegistry
}
Expand Down
170 changes: 170 additions & 0 deletions pkg/monitortests/monitoring/disruptionmetricsapi/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package disruptionmetricsapi

import (
"context"
"fmt"
"time"

"github.com/openshift/origin/pkg/monitortestframework"

appsv1 "k8s.io/api/apps/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/openshift/origin/pkg/monitor/backenddisruption"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestlibrary/disruptionlibrary"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
)

const (
monitoringNamespace = "openshift-monitoring"
prometheusAdapterDeployentName = "prometheus-adapter"
metricsServerDeploymentName = "metrics-server"
)

type availability struct {
disruptionCheckers []*disruptionlibrary.Availability
notSupportedReason error
}

func NewAvailabilityInvariant() monitortestframework.MonitorTest {
return &availability{}
}

func createAPIServerBackendSampler(clusterConfig *rest.Config, disruptionBackendName, url string, connectionType monitorapi.BackendConnectionType) (*backenddisruption.BackendSampler, error) {
backendSampler, err := backenddisruption.NewAPIServerBackend(clusterConfig, disruptionBackendName, url, connectionType)
if err != nil {
return nil, err
}
backendSampler = backendSampler.WithUserAgent(fmt.Sprintf("openshift-external-backend-sampler-%s-%s", connectionType, disruptionBackendName))

return backendSampler, nil
}

func (w *availability) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
var err error

// Skip for single replica Deployments.
kubeClient, err := kubernetes.NewForConfig(adminRESTConfig)
if err != nil {
return err
}
var deployment *appsv1.Deployment
deployment, err = kubeClient.AppsV1().Deployments(monitoringNamespace).Get(ctx, metricsServerDeploymentName, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
// TODO: remove this in 4.17
deployment, err = kubeClient.AppsV1().Deployments(monitoringNamespace).Get(ctx, prometheusAdapterDeployentName, metav1.GetOptions{})
if err != nil {
return err
}
} else if err != nil {
return err
}
if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas == 1 {
w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: fmt.Sprintf("%s only has a single replica", deployment.Name)}
return w.notSupportedReason
}

disruptionBackedName := "metrics-api"

newConnectionTestName := "[sig-instrumentation] disruption/metrics-api connection/new should be available throughout the test"
reusedConnectionTestName := "[sig-instrumentation] disruption/metrics-api connection/reused should be available throughout the test"

// TODO: clean up/refactor following.

// For nodes metrics
newConnections, err := createAPIServerBackendSampler(adminRESTConfig, disruptionBackedName, "/apis/metrics.k8s.io/v1beta1/nodes", monitorapi.NewConnectionType)
if err != nil {
return err
}
reusedConnections, err := createAPIServerBackendSampler(adminRESTConfig, disruptionBackedName, "/apis/metrics.k8s.io/v1beta1/nodes", monitorapi.ReusedConnectionType)
if err != nil {
return err
}

w.disruptionCheckers = append(w.disruptionCheckers, disruptionlibrary.NewAvailabilityInvariant(
newConnectionTestName, reusedConnectionTestName,
newConnections, reusedConnections,
))

// For pods metrics, monitoringNamespace is always available and we don't want to ask cluster wide.
newConnections, err = createAPIServerBackendSampler(adminRESTConfig, disruptionBackedName, fmt.Sprintf("/apis/metrics.k8s.io/v1beta1/namespaces/%s/pods", monitoringNamespace), monitorapi.NewConnectionType)
if err != nil {
return err
}
reusedConnections, err = createAPIServerBackendSampler(adminRESTConfig, disruptionBackedName, fmt.Sprintf("/apis/metrics.k8s.io/v1beta1/namespaces/%s/pods", monitoringNamespace), monitorapi.ReusedConnectionType)
if err != nil {
return err
}

w.disruptionCheckers = append(w.disruptionCheckers, disruptionlibrary.NewAvailabilityInvariant(
newConnectionTestName, reusedConnectionTestName,
newConnections, reusedConnections,
))

for i := range w.disruptionCheckers {
if err := w.disruptionCheckers[i].StartCollection(ctx, adminRESTConfig, recorder); err != nil {
return err
}
}

return nil
}

func (w *availability) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
if w.notSupportedReason != nil {
return nil, nil, w.notSupportedReason
}

intervals := monitorapi.Intervals{}
junits := []*junitapi.JUnitTestCase{}
errs := []error{}

for i := range w.disruptionCheckers {
// TODO: check for nil after refactoring in StartCollection
localIntervals, localJunits, localErr := w.disruptionCheckers[i].CollectData(ctx)
intervals = append(intervals, localIntervals...)
junits = append(junits, localJunits...)
if localErr != nil {
errs = append(errs, localErr)
}
}

return intervals, junits, utilerrors.NewAggregate(errs)
}

func (w *availability) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
return nil, w.notSupportedReason
}

func (w *availability) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
if w.notSupportedReason != nil {
return nil, w.notSupportedReason
}

junits := []*junitapi.JUnitTestCase{}
errs := []error{}

for i := range w.disruptionCheckers {
// TODO: check for nil after refactoring in StartCollection
localJunits, localErr := w.disruptionCheckers[i].EvaluateTestsFromConstructedIntervals(ctx, finalIntervals)
junits = append(junits, localJunits...)
if localErr != nil {
errs = append(errs, localErr)
}
}

return junits, utilerrors.NewAggregate(errs)
}

func (w *availability) WriteContentToStorage(ctx context.Context, storageDir string, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
return w.notSupportedReason
}

func (w *availability) Cleanup(ctx context.Context) error {
return w.notSupportedReason
}

0 comments on commit 93a0eff

Please sign in to comment.