-
Notifications
You must be signed in to change notification settings - Fork 68
🌱 Don't assume there is only one replica in e2e tests #2379
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,6 +11,7 @@ import ( | |||||||||||||||
| "github.com/stretchr/testify/assert" | ||||||||||||||||
| "github.com/stretchr/testify/require" | ||||||||||||||||
| appsv1 "k8s.io/api/apps/v1" | ||||||||||||||||
| coordinationv1 "k8s.io/api/coordination/v1" | ||||||||||||||||
| corev1 "k8s.io/api/core/v1" | ||||||||||||||||
| apimeta "k8s.io/apimachinery/pkg/api/meta" | ||||||||||||||||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||||||||||||
|
|
@@ -195,13 +196,14 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) { | |||||||||||||||
|
|
||||||||||||||||
| // waitForDeployment checks that the updated deployment with the given app.kubernetes.io/name label | ||||||||||||||||
| // has reached the desired number of replicas and that the number pods matches that number | ||||||||||||||||
| // i.e. no old pods remain. It will return a pointer to the first pod. This is only necessary | ||||||||||||||||
| // i.e. no old pods remain. It will return a pointer to the leader pod. This is only necessary | ||||||||||||||||
| // to facilitate the mitigation put in place for https://github.com/operator-framework/operator-controller/issues/1626 | ||||||||||||||||
| func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel string) *corev1.Pod { | ||||||||||||||||
| deploymentLabelSelector := labels.Set{"app.kubernetes.io/name": controlPlaneLabel}.AsSelector() | ||||||||||||||||
|
|
||||||||||||||||
| t.Log("Checking that the deployment is updated") | ||||||||||||||||
| t.Log("Checking that the deployment is updated and available") | ||||||||||||||||
| var desiredNumReplicas int32 | ||||||||||||||||
| var deploymentNamespace string | ||||||||||||||||
| require.EventuallyWithT(t, func(ct *assert.CollectT) { | ||||||||||||||||
| var managerDeployments appsv1.DeploymentList | ||||||||||||||||
| require.NoError(ct, c.List(ctx, &managerDeployments, client.MatchingLabelsSelector{Selector: deploymentLabelSelector})) | ||||||||||||||||
|
|
@@ -214,16 +216,64 @@ func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel stri | |||||||||||||||
| managerDeployment.Status.AvailableReplicas == *managerDeployment.Spec.Replicas && | ||||||||||||||||
| managerDeployment.Status.ReadyReplicas == *managerDeployment.Spec.Replicas, | ||||||||||||||||
| ) | ||||||||||||||||
|
|
||||||||||||||||
| // Check that the deployment has the Available condition set to True | ||||||||||||||||
| var availableCond *appsv1.DeploymentCondition | ||||||||||||||||
| for i := range managerDeployment.Status.Conditions { | ||||||||||||||||
| if managerDeployment.Status.Conditions[i].Type == appsv1.DeploymentAvailable { | ||||||||||||||||
| availableCond = &managerDeployment.Status.Conditions[i] | ||||||||||||||||
| break | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| require.NotNil(ct, availableCond, "Available condition not found") | ||||||||||||||||
| require.Equal(ct, corev1.ConditionTrue, availableCond.Status, "Deployment Available condition is not True") | ||||||||||||||||
|
|
||||||||||||||||
| desiredNumReplicas = *managerDeployment.Spec.Replicas | ||||||||||||||||
| deploymentNamespace = managerDeployment.Namespace | ||||||||||||||||
| }, time.Minute, time.Second) | ||||||||||||||||
|
|
||||||||||||||||
| var managerPods corev1.PodList | ||||||||||||||||
| t.Logf("Ensure the number of remaining pods equal the desired number of replicas (%d)", desiredNumReplicas) | ||||||||||||||||
| require.EventuallyWithT(t, func(ct *assert.CollectT) { | ||||||||||||||||
| require.NoError(ct, c.List(ctx, &managerPods, client.MatchingLabelsSelector{Selector: deploymentLabelSelector})) | ||||||||||||||||
| require.Len(ct, managerPods.Items, 1) | ||||||||||||||||
| require.Len(ct, managerPods.Items, int(desiredNumReplicas)) | ||||||||||||||||
| }, time.Minute, time.Second) | ||||||||||||||||
| return &managerPods.Items[0] | ||||||||||||||||
|
|
||||||||||||||||
| // Find the leader pod by checking the lease | ||||||||||||||||
| t.Log("Finding the leader pod") | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we're carrying too much complexity here. It seems we're returning the pod for two reasons:
I wonder if we could refactor this helper to wait for the Available condition to be True and .status.replicas == .status.updatedReplicas, or something like that, and drop the return value. I question whether we need 1 at all and we could try to tackle 2. in a different way, e.g. by hitting the catalog service endpoint until it doesn't return a 404. wdyt?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if changing the test that much is in scope here. This is preserving the test as-is, but making sure the returned pod is correct.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We are already checking updatedReplicas, replicas, availableReplicas and readyReplicas: operator-controller/test/upgrade-e2e/post_upgrade_test.go Lines 211 to 217 in 0240528
Do we need to do more than that? Which Available condition are you referring to? Deployment? ClusterCatalog?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Deployment
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's fair. I just don't know that we want to carry so much complexity and it might be worth questioning the need for the return value and .zip things. I think it's ok to move it over to another PR (or maybe the cucumber tests might already cover this). But, I thought I'd call it out.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cucumber stuff will likely rewrite some of that, so I'd say defer it to that, but I can add a quick check for the deployment Available status, since we're already checking things on the deployment. |
||||||||||||||||
| // Map component labels to their leader election lease names | ||||||||||||||||
| leaseNames := map[string]string{ | ||||||||||||||||
| "catalogd": "catalogd-operator-lock", | ||||||||||||||||
| "operator-controller": "9c4404e7.operatorframework.io", | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| leaseName, ok := leaseNames[controlPlaneLabel] | ||||||||||||||||
| if !ok { | ||||||||||||||||
| t.Fatalf("Unknown control plane component: %s", controlPlaneLabel) | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| var leaderPod *corev1.Pod | ||||||||||||||||
| require.EventuallyWithT(t, func(ct *assert.CollectT) { | ||||||||||||||||
| var lease coordinationv1.Lease | ||||||||||||||||
| require.NoError(ct, c.Get(ctx, types.NamespacedName{Name: leaseName, Namespace: deploymentNamespace}, &lease)) | ||||||||||||||||
| require.NotNil(ct, lease.Spec.HolderIdentity) | ||||||||||||||||
|
|
||||||||||||||||
| leaderIdentity := *lease.Spec.HolderIdentity | ||||||||||||||||
| // The lease holder identity format is: <pod-name>_<leader-election-id-suffix> | ||||||||||||||||
| // Extract just the pod name by splitting on '_' | ||||||||||||||||
| podName := strings.Split(leaderIdentity, "_")[0] | ||||||||||||||||
|
|
||||||||||||||||
| // Find the pod with matching name | ||||||||||||||||
| for i := range managerPods.Items { | ||||||||||||||||
| if managerPods.Items[i].Name == podName { | ||||||||||||||||
| leaderPod = &managerPods.Items[i] | ||||||||||||||||
| break | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| require.NotNil(ct, leaderPod, "leader pod not found with identity: %s (pod name: %s)", leaderIdentity, podName) | ||||||||||||||||
| }, time.Minute, time.Second) | ||||||||||||||||
|
|
||||||||||||||||
| return leaderPod | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| func watchPodLogsForSubstring(ctx context.Context, pod *corev1.Pod, substrings ...string) (bool, error) { | ||||||||||||||||
|
|
||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we could send requests to pod using their FQDN, no need to find out their IPs.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But it appears that the FQDN uses dashed IP addresses, the pods don't have a hostname/subdomain set.
EDIT: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, you are absolutely right.
nit: you can read all IP addresses for a service (i.e. all pod IPs) by fetching Endpoint resource (the name is the same as of service).