Skip to content

Commit

Permalink
Merge pull request #96299 from ggriffiths/snapshot_controller_metrics…
Browse files Browse the repository at this point in the history
…_e2e_tests

Add Snapshot Controller e2e metric tests
  • Loading branch information
k8s-ci-robot committed May 23, 2021
2 parents 5be51c9 + 564e531 commit d790366
Show file tree
Hide file tree
Showing 13 changed files with 467 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ spec:
serviceAccount: volume-snapshot-controller
containers:
- name: volume-snapshot-controller
image: k8s.gcr.io/sig-storage/snapshot-controller:v3.0.2
image: k8s.gcr.io/sig-storage/snapshot-controller:v4.0.0
args:
- "--v=5"
- "--metrics-path=/metrics"
- "--http-endpoint=:9102"
2 changes: 1 addition & 1 deletion test/e2e/apimachinery/garbage_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func verifyRemainingObjects(f *framework.Framework, objects map[string]int) (boo
func gatherMetrics(f *framework.Framework) {
ginkgo.By("Gathering metrics")
var summary framework.TestDataSummary
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false, false)
if err != nil {
framework.Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
} else {
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func (f *Framework) BeforeEach() {

gatherMetricsAfterTest := TestContext.GatherMetricsAfterTest == "true" || TestContext.GatherMetricsAfterTest == "master"
if gatherMetricsAfterTest && TestContext.IncludeClusterAutoscalerMetrics {
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics, false)
if err != nil {
Logf("Failed to create MetricsGrabber (skipping ClusterAutoscaler metrics gathering before test): %v", err)
} else {
Expand Down Expand Up @@ -449,7 +449,7 @@ func (f *Framework) AfterEach() {
ginkgo.By("Gathering metrics")
// Grab apiserver, scheduler, controller-manager metrics and (optionally) nodes' kubelet metrics.
grabMetricsFromKubelets := TestContext.GatherMetricsAfterTest != "master" && !ProviderIs("kubemark")
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics)
grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics, false)
if err != nil {
Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err)
} else {
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/framework/metrics/kubelet_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletM
if c == nil {
return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics")
}
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false)
grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false, false)
if err != nil {
return KubeletMetrics{}, err
}
Expand Down
121 changes: 94 additions & 27 deletions test/e2e/framework/metrics/metrics_grabber.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,40 +38,48 @@ const (
// kubeControllerManagerPort is the default port for the controller manager status server.
kubeControllerManagerPort = 10257
metricsProxyPod = "metrics-proxy"
// snapshotControllerPort is the port for the snapshot controller
snapshotControllerPort = 9102
)

// Collection is metrics collection of components
type Collection struct {
APIServerMetrics APIServerMetrics
ControllerManagerMetrics ControllerManagerMetrics
KubeletMetrics map[string]KubeletMetrics
SchedulerMetrics SchedulerMetrics
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
APIServerMetrics APIServerMetrics
ControllerManagerMetrics ControllerManagerMetrics
SnapshotControllerMetrics SnapshotControllerMetrics
KubeletMetrics map[string]KubeletMetrics
SchedulerMetrics SchedulerMetrics
ClusterAutoscalerMetrics ClusterAutoscalerMetrics
}

// Grabber provides functions which grab metrics from components
type Grabber struct {
client clientset.Interface
externalClient clientset.Interface
grabFromAPIServer bool
grabFromControllerManager bool
grabFromKubelets bool
grabFromScheduler bool
grabFromClusterAutoscaler bool
kubeScheduler string
waitForSchedulerReadyOnce sync.Once
kubeControllerManager string
waitForControllerManagerReadyOnce sync.Once
client clientset.Interface
externalClient clientset.Interface
grabFromAPIServer bool
grabFromControllerManager bool
grabFromKubelets bool
grabFromScheduler bool
grabFromClusterAutoscaler bool
grabFromSnapshotController bool
kubeScheduler string
waitForSchedulerReadyOnce sync.Once
kubeControllerManager string
waitForControllerManagerReadyOnce sync.Once
snapshotController string
waitForSnapshotControllerReadyOnce sync.Once
}

// NewMetricsGrabber returns new metrics which are initialized.
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) {
func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) {

kubeScheduler := ""
kubeControllerManager := ""
snapshotControllerManager := ""

regKubeScheduler := regexp.MustCompile("kube-scheduler-.*")
regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*")
regSnapshotController := regexp.MustCompile("volume-snapshot-controller.*")

podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{})
if err != nil {
Expand All @@ -87,7 +95,10 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
if regKubeControllerManager.MatchString(pod.Name) {
kubeControllerManager = pod.Name
}
if kubeScheduler != "" && kubeControllerManager != "" {
if regSnapshotController.MatchString(pod.Name) {
snapshotControllerManager = pod.Name
}
if kubeScheduler != "" && kubeControllerManager != "" && snapshotControllerManager != "" {
break
}
}
Expand All @@ -99,20 +110,26 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b
controllers = false
klog.Warningf("Can't find kube-controller-manager pod. Grabbing metrics from kube-controller-manager is disabled.")
}
if snapshotControllerManager == "" {
snapshotController = false
klog.Warningf("Can't find snapshot-controller pod. Grabbing metrics from snapshot-controller is disabled.")
}
if ec == nil {
klog.Warningf("Did not receive an external client interface. Grabbing metrics from ClusterAutoscaler is disabled.")
}

return &Grabber{
client: c,
externalClient: ec,
grabFromAPIServer: apiServer,
grabFromControllerManager: controllers,
grabFromKubelets: kubelets,
grabFromScheduler: scheduler,
grabFromClusterAutoscaler: clusterAutoscaler,
kubeScheduler: kubeScheduler,
kubeControllerManager: kubeControllerManager,
client: c,
externalClient: ec,
grabFromAPIServer: apiServer,
grabFromControllerManager: controllers,
grabFromKubelets: kubelets,
grabFromScheduler: scheduler,
grabFromClusterAutoscaler: clusterAutoscaler,
grabFromSnapshotController: snapshotController,
kubeScheduler: kubeScheduler,
kubeControllerManager: kubeControllerManager,
snapshotController: snapshotControllerManager,
}, nil
}

Expand Down Expand Up @@ -220,6 +237,48 @@ func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error)
return parseControllerManagerMetrics(output)
}

// GrabFromSnapshotController returns metrics from controller manager
func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) {
if g.snapshotController == "" {
return SnapshotControllerMetrics{}, fmt.Errorf("SnapshotController pod is not registered. Skipping SnapshotController's metrics gathering")
}

// Use overrides if provided via test config flags.
// Otherwise, use the default snapshot controller pod name and port.
if podName == "" {
podName = g.snapshotController
}
if port == 0 {
port = snapshotControllerPort
}

var err error
g.waitForSnapshotControllerReadyOnce.Do(func() {
if readyErr := e2epod.WaitForPodsReady(g.client, metav1.NamespaceSystem, podName, 0); readyErr != nil {
err = fmt.Errorf("error waiting for snapshot controller pod to be ready: %w", readyErr)
return
}

var lastMetricsFetchErr error
if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) {
_, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
return lastMetricsFetchErr == nil, nil
}); metricsWaitErr != nil {
err = fmt.Errorf("error waiting for snapshot controller pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr)
return
}
})
if err != nil {
return SnapshotControllerMetrics{}, err
}

output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port)
if err != nil {
return SnapshotControllerMetrics{}, err
}
return parseSnapshotControllerMetrics(output)
}

// GrabFromAPIServer returns metrics from API server
func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) {
output, err := g.getMetricsFromAPIServer()
Expand Down Expand Up @@ -257,6 +316,14 @@ func (g *Grabber) Grab() (Collection, error) {
result.ControllerManagerMetrics = metrics
}
}
if g.grabFromSnapshotController {
metrics, err := g.GrabFromSnapshotController(g.snapshotController, snapshotControllerPort)
if err != nil {
errs = append(errs, err)
} else {
result.SnapshotControllerMetrics = metrics
}
}
if g.grabFromClusterAutoscaler {
metrics, err := g.GrabFromClusterAutoscaler()
if err != nil {
Expand Down
40 changes: 40 additions & 0 deletions test/e2e/framework/metrics/snapshot_controller_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import "k8s.io/component-base/metrics/testutil"

// SnapshotControllerMetrics is metrics for controller manager
type SnapshotControllerMetrics testutil.Metrics

// Equal returns true if all metrics are the same as the arguments.
func (m *SnapshotControllerMetrics) Equal(o SnapshotControllerMetrics) bool {
return (*testutil.Metrics)(m).Equal(testutil.Metrics(o))
}

func newSnapshotControllerMetrics() SnapshotControllerMetrics {
result := testutil.NewMetrics()
return SnapshotControllerMetrics(result)
}

func parseSnapshotControllerMetrics(data string) (SnapshotControllerMetrics, error) {
result := newSnapshotControllerMetrics()
if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil {
return SnapshotControllerMetrics{}, err
}
return result, nil
}
9 changes: 9 additions & 0 deletions test/e2e/framework/test_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ type TestContextType struct {

// DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test.
DockerConfigFile string

// SnapshotControllerPodName is the name used for identifying the snapshot controller pod.
SnapshotControllerPodName string

// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
SnapshotControllerHTTPPort int
}

// NodeKillerConfig describes configuration of NodeKiller -- a utility to
Expand Down Expand Up @@ -315,6 +321,9 @@ func RegisterCommonFlags(flags *flag.FlagSet) {
flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.")
flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.")
flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A file that contains credentials which can be used to pull images from certain private registries, needed for a test.")

flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.")
flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.")
}

// RegisterClusterFlags registers flags specific to the cluster e2e test suite.
Expand Down
57 changes: 31 additions & 26 deletions test/e2e/framework/timeouts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@ import "time"

const (
// Default timeouts to be used in TimeoutContext
podStartTimeout = 5 * time.Minute
podStartShortTimeout = 2 * time.Minute
podStartSlowTimeout = 15 * time.Minute
podDeleteTimeout = 5 * time.Minute
claimProvisionTimeout = 5 * time.Minute
claimProvisionShortTimeout = 1 * time.Minute
claimBoundTimeout = 3 * time.Minute
pvReclaimTimeout = 3 * time.Minute
pvBoundTimeout = 3 * time.Minute
pvDeleteTimeout = 3 * time.Minute
pvDeleteSlowTimeout = 20 * time.Minute
snapshotCreateTimeout = 5 * time.Minute
snapshotDeleteTimeout = 5 * time.Minute
podStartTimeout = 5 * time.Minute
podStartShortTimeout = 2 * time.Minute
podStartSlowTimeout = 15 * time.Minute
podDeleteTimeout = 5 * time.Minute
claimProvisionTimeout = 5 * time.Minute
claimProvisionShortTimeout = 1 * time.Minute
claimBoundTimeout = 3 * time.Minute
pvReclaimTimeout = 3 * time.Minute
pvBoundTimeout = 3 * time.Minute
pvDeleteTimeout = 3 * time.Minute
pvDeleteSlowTimeout = 20 * time.Minute
snapshotCreateTimeout = 5 * time.Minute
snapshotDeleteTimeout = 5 * time.Minute
snapshotControllerMetricsTimeout = 5 * time.Minute
)

// TimeoutContext contains timeout settings for several actions.
Expand Down Expand Up @@ -77,23 +78,27 @@ type TimeoutContext struct {

// SnapshotDelete is how long for snapshot to delete snapshotContent.
SnapshotDelete time.Duration

// SnapshotControllerMetrics is how long to wait for snapshot controller metrics.
SnapshotControllerMetrics time.Duration
}

// NewTimeoutContextWithDefaults returns a TimeoutContext with default values.
func NewTimeoutContextWithDefaults() *TimeoutContext {
return &TimeoutContext{
PodStart: podStartTimeout,
PodStartShort: podStartShortTimeout,
PodStartSlow: podStartSlowTimeout,
PodDelete: podDeleteTimeout,
ClaimProvision: claimProvisionTimeout,
ClaimProvisionShort: claimProvisionShortTimeout,
ClaimBound: claimBoundTimeout,
PVReclaim: pvReclaimTimeout,
PVBound: pvBoundTimeout,
PVDelete: pvDeleteTimeout,
PVDeleteSlow: pvDeleteSlowTimeout,
SnapshotCreate: snapshotCreateTimeout,
SnapshotDelete: snapshotDeleteTimeout,
PodStart: podStartTimeout,
PodStartShort: podStartShortTimeout,
PodStartSlow: podStartSlowTimeout,
PodDelete: podDeleteTimeout,
ClaimProvision: claimProvisionTimeout,
ClaimProvisionShort: claimProvisionShortTimeout,
ClaimBound: claimBoundTimeout,
PVReclaim: pvReclaimTimeout,
PVBound: pvBoundTimeout,
PVDelete: pvDeleteTimeout,
PVDeleteSlow: pvDeleteSlowTimeout,
SnapshotCreate: snapshotCreateTimeout,
SnapshotDelete: snapshotDeleteTimeout,
SnapshotControllerMetrics: snapshotControllerMetricsTimeout,
}
}
2 changes: 1 addition & 1 deletion test/e2e/instrumentation/monitoring/metrics_grabber.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() {
}
}
gomega.Eventually(func() error {
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true)
grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true, true)
if err != nil {
return fmt.Errorf("failed to create metrics grabber: %v", err)
}
Expand Down

0 comments on commit d790366

Please sign in to comment.