Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test/e2e/prometheusadapter: reenable CA rotation test #576

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 14 additions & 1 deletion test/e2e/framework/route_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,12 @@ func NewRouteClient(
var token string

for _, secret := range secrets.Items {
if strings.Contains(secret.Name, "cluster-monitoring-operator-e2e-token-") {
_, dockerToken := secret.Annotations["openshift.io/create-dockercfg-secrets"]
e2eToken := strings.Contains(secret.Name, "cluster-monitoring-operator-e2e-token-")

// we have to skip the token secret that contains the openshift.io/create-dockercfg-secrets annotation
// as this is the token to talk to the internal registry.
if !dockerToken && e2eToken {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just add a comment that if its dockerToken that means this token is an invalid one, to avoid future confusion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good

token = string(secret.Data["token"])
}
}
Expand Down Expand Up @@ -101,6 +106,10 @@ func (c *RouteClient) PrometheusQuery(query string) ([]byte, error) {
return nil, err
}

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code response, want %d, got %d", http.StatusOK, resp.StatusCode)
}

defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
Expand Down Expand Up @@ -139,6 +148,10 @@ func (c *RouteClient) AlertmanagerQuery(kvs ...string) ([]byte, error) {
return nil, err
}

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code response, want %d, got %d", http.StatusOK, resp.StatusCode)
}

defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ func testMain(m *testing.M) error {
)
body, loopErr = f.ThanosQuerierClient.PrometheusQuery("count(up{job=\"prometheus-k8s\"})")
if loopErr != nil {
loopErr = errors.Wrap(loopErr, "error executing prometheus query")
return false, nil
}

v, loopErr = framework.GetFirstValueFromPromQuery(body)
if loopErr != nil {
loopErr = errors.Wrapf(loopErr, "error getting first value from prometheus response %q", string(body))
return false, nil
}

Expand Down
117 changes: 83 additions & 34 deletions test/e2e/prometheusadapter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,21 +221,21 @@ func TestAggregatedMetricPermissions(t *testing.T) {
}
}

func DisabledTestPrometheusAdapterCARotation(t *testing.T) {
var lastErr error
// Wait for Prometheus adapter
err := wait.Poll(time.Second, 5*time.Minute, func() (bool, error) {
func TestPrometheusAdapterCARotation(t *testing.T) {
// Wait for prometheus-adapter deployment
err := framework.Poll(5*time.Second, 5*time.Minute, func() error {
_, err := f.KubeClient.AppsV1().Deployments(f.Ns).Get("prometheus-adapter", metav1.GetOptions{})
lastErr = errors.Wrap(err, "getting prometheus-adapter deployment failed")
if err != nil {
return false, nil
return errors.Wrap(err, "getting prometheus-adapter deployment failed")
}
return true, nil
return nil
})
if err != nil {
if err == wait.ErrWaitTimeout && lastErr != nil {
err = lastErr
}
t.Fatal(err)
}

tls, err := f.KubeClient.CoreV1().Secrets("openshift-monitoring").Get("prometheus-adapter-tls", metav1.GetOptions{})
if err != nil {
t.Fatal(err)
}

Expand All @@ -244,57 +244,106 @@ func DisabledTestPrometheusAdapterCARotation(t *testing.T) {
t.Fatal(err)
}

tls, err := f.KubeClient.CoreV1().Secrets("openshift-monitoring").Get("prometheus-adapter-tls", metav1.GetOptions{})
factory := manifests.NewFactory("openshift-monitoring", "", nil)
adapterSecret, err := factory.PrometheusAdapterSecret(tls, apiAuth)
if err != nil {
t.Fatal(err)
}

// Simulate rotation by simply adding a newline to existing certs.
// This change will be propagated to the cluster monitoring operator,
// causing a new secret to be created.
apiAuth.Data["requestheader-client-ca-file"] = apiAuth.Data["requestheader-client-ca-file"] + "\n"
apiAuth, err = f.KubeClient.CoreV1().ConfigMaps("kube-system").Update(apiAuth)
// the secret might not have been created yet, so wait for it
err = framework.Poll(5*time.Second, 5*time.Minute, func() error {
_, err = f.KubeClient.CoreV1().Secrets("openshift-monitoring").Get(adapterSecret.GetName(), metav1.GetOptions{})
return err
})
if err != nil {
t.Fatal(err)
}

factory := manifests.NewFactory("openshift-monitoring", "", nil)
newSecret, err := factory.PrometheusAdapterSecret(tls, apiAuth)
// Delete the signer secrets. This causes kube-system/extension-apiserver-authentication
// to be reissued.
err = f.KubeClient.CoreV1().Secrets("openshift-kube-controller-manager-operator").Delete("csr-signer-signer", nil)
if err != nil {
t.Fatal(err)
}

// Wait for the new secret to be created
err = wait.Poll(time.Second, 5*time.Minute, func() (bool, error) {
_, err := f.KubeClient.CoreV1().Secrets(f.Ns).Get(newSecret.Name, metav1.GetOptions{})
lastErr = errors.Wrap(err, "getting new api auth secret failed")
err = f.KubeClient.CoreV1().Secrets("openshift-kube-controller-manager-operator").Delete("csr-signer", nil)
if err != nil {
t.Fatal(err)
}

// Wait for the new secret to be deployed
var newSecret corev1.Secret
err = framework.Poll(5*time.Second, 15*time.Minute, func() error {
secrets, err := f.KubeClient.CoreV1().Secrets("openshift-monitoring").List(metav1.ListOptions{
LabelSelector: "monitoring.openshift.io/name=prometheus-adapter,monitoring.openshift.io/hash!=" + adapterSecret.Labels["monitoring.openshift.io/hash"],
})

if err != nil {
return false, nil
return errors.Wrap(err, "error listing prometheus adapter secrets")
}
return true, nil

if len(secrets.Items) == 0 {
return errors.New("expected prometheus adapter secret to have rotated, but it didn't")
}

if got := len(secrets.Items); got > 1 {
return fmt.Errorf("expected exactly 1 prometheus adapter secret to be present, got %d", got)
}

newSecret = secrets.Items[0]
return nil
})
if err != nil {
if err == wait.ErrWaitTimeout && lastErr != nil {
err = lastErr
t.Fatal(err)
}

// Wait for prometheus-adapter deployment to reference new secret
err = framework.Poll(time.Second, 5*time.Minute, func() error {
d, err := f.KubeClient.AppsV1().Deployments(f.Ns).Get("prometheus-adapter", metav1.GetOptions{})
if err != nil {
return errors.Wrap(err, "getting prometheus-adapter deployment failed")
}

for _, v := range d.Spec.Template.Spec.Volumes {
if v.Name != "tls" {
continue
}

if v.VolumeSource.Secret.SecretName != newSecret.GetName() {
continue
}

return nil
}

return fmt.Errorf("expected secret %v to be referenced in prometheus-adapter but it didn't", newSecret.GetName())
})
if err != nil {
t.Fatal(err)
}

// Wait for new Prometheus adapter to roll out
err = wait.Poll(time.Second, 5*time.Minute, func() (bool, error) {
err = framework.Poll(time.Second, 5*time.Minute, func() error {
d, err := f.KubeClient.AppsV1().Deployments(f.Ns).Get("prometheus-adapter", metav1.GetOptions{})
lastErr = errors.Wrap(err, "getting new prometheus adapter deployment failed")
if err != nil {
return false, nil
return errors.Wrap(err, "getting prometheus-adapter deployment failed")
}

lastErr = fmt.Errorf("waiting for updated replica count=%d to be spec replica count=%d", d.Status.UpdatedReplicas, *d.Spec.Replicas)
return d.Status.UpdatedReplicas == *d.Spec.Replicas, nil
if d.Status.UpdatedReplicas < *d.Spec.Replicas {
return fmt.Errorf("waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...", d.Name, d.Status.UpdatedReplicas, *d.Spec.Replicas)
}

if d.Status.Replicas > d.Status.UpdatedReplicas {
return fmt.Errorf("waiting for deployment %q rollout to finish: %d old replicas are pending termination...", d.Name, d.Status.Replicas-d.Status.UpdatedReplicas)
}

if d.Status.AvailableReplicas < d.Status.UpdatedReplicas {
return fmt.Errorf("waiting for deployment %q rollout to finish: %d of %d updated replicas are available...", d.Name, d.Status.AvailableReplicas, d.Status.UpdatedReplicas)
}

return nil
})
if err != nil {
if err == wait.ErrWaitTimeout && lastErr != nil {
err = lastErr
}
t.Fatal(err)
}
}