New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Promote APIServerIdentity to Beta #113629
Changes from all commits
368f9f9
02020b2
d24f93e
c2d387c
196a3b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
/* | ||
Copyright 2022 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package apimachinery | ||
|
||
import ( | ||
"context" | ||
"crypto/sha256" | ||
"encoding/base32" | ||
"errors" | ||
"fmt" | ||
"net" | ||
"strings" | ||
"time" | ||
|
||
"github.com/onsi/ginkgo/v2" | ||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/kubernetes/test/e2e/framework" | ||
e2enode "k8s.io/kubernetes/test/e2e/framework/node" | ||
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" | ||
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" | ||
admissionapi "k8s.io/pod-security-admission/api" | ||
) | ||
|
||
func getControlPlaneHostname(node *v1.Node) (string, error) { | ||
nodeAddresses := e2enode.GetAddresses(node, v1.NodeExternalIP) | ||
if len(nodeAddresses) == 0 { | ||
return "", errors.New("no valid addresses to use for SSH") | ||
} | ||
|
||
controlPlaneAddress := nodeAddresses[0] | ||
|
||
host := controlPlaneAddress + ":" + e2essh.SSHPort | ||
result, err := e2essh.SSH("hostname", host, framework.TestContext.Provider) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
if result.Code != 0 { | ||
return "", fmt.Errorf("encountered non-zero exit code when running hostname command: %d", result.Code) | ||
} | ||
|
||
return strings.TrimSpace(result.Stdout), nil | ||
} | ||
|
||
// restartAPIServer attempts to restart the kube-apiserver on a node | ||
func restartAPIServer(node *v1.Node) error { | ||
nodeAddresses := e2enode.GetAddresses(node, v1.NodeExternalIP) | ||
if len(nodeAddresses) == 0 { | ||
return errors.New("no valid addresses to use for SSH") | ||
} | ||
|
||
controlPlaneAddress := nodeAddresses[0] | ||
cmd := "pidof kube-apiserver | xargs sudo kill" | ||
framework.Logf("Restarting kube-apiserver via ssh, running: %v", cmd) | ||
result, err := e2essh.SSH(cmd, net.JoinHostPort(controlPlaneAddress, e2essh.SSHPort), framework.TestContext.Provider) | ||
if err != nil || result.Code != 0 { | ||
e2essh.LogResult(result) | ||
return fmt.Errorf("couldn't restart kube-apiserver: %v", err) | ||
} | ||
return nil | ||
} | ||
|
||
// This test requires that --feature-gates=APIServerIdentity=true be set on the apiserver | ||
var _ = SIGDescribe("kube-apiserver identity [Feature:APIServerIdentity]", func() { | ||
f := framework.NewDefaultFramework("kube-apiserver-identity") | ||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged | ||
|
||
ginkgo.It("kube-apiserver identity should persist after restart [Disruptive]", func() { | ||
e2eskipper.SkipUnlessProviderIs("gce") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this something we do often? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, especially on tests where you have to ssh into the nodes or run some external operation against them |
||
|
||
client := f.ClientSet | ||
|
||
var controlPlaneNodes []v1.Node | ||
nodes, err := client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) | ||
framework.ExpectNoError(err) | ||
|
||
for _, node := range nodes.Items { | ||
if _, ok := node.Labels["node-role.kubernetes.io/control-plane"]; ok { | ||
controlPlaneNodes = append(controlPlaneNodes, node) | ||
continue | ||
} | ||
|
||
if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok { | ||
controlPlaneNodes = append(controlPlaneNodes, node) | ||
continue | ||
} | ||
|
||
for _, taint := range node.Spec.Taints { | ||
if taint.Key == "node-role.kubernetes.io/master" { | ||
controlPlaneNodes = append(controlPlaneNodes, node) | ||
break | ||
} | ||
|
||
if taint.Key == "node-role.kubernetes.io/control-plane" { | ||
controlPlaneNodes = append(controlPlaneNodes, node) | ||
break | ||
} | ||
} | ||
} | ||
|
||
leases, err := client.CoordinationV1().Leases(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{ | ||
LabelSelector: "k8s.io/component=kube-apiserver", | ||
}) | ||
framework.ExpectNoError(err) | ||
framework.ExpectEqual(len(leases.Items), len(controlPlaneNodes), "unexpected number of leases") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this flake in CI? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only if you run it on a cluster while it is still being provisioned. Otherwise the node check above should prevent flakes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it doesn't hurt to add a retry loop here |
||
|
||
for _, node := range controlPlaneNodes { | ||
hostname, err := getControlPlaneHostname(&node) | ||
framework.ExpectNoError(err) | ||
|
||
hash := sha256.Sum256([]byte(hostname)) | ||
leaseName := "kube-apiserver-" + strings.ToLower(base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(hash[:16])) | ||
|
||
lease, err := client.CoordinationV1().Leases(metav1.NamespaceSystem).Get(context.TODO(), leaseName, metav1.GetOptions{}) | ||
framework.ExpectNoError(err) | ||
oldHolderIdentity := lease.Spec.HolderIdentity | ||
lastRenewedTime := lease.Spec.RenewTime | ||
|
||
err = restartAPIServer(&node) | ||
framework.ExpectNoError(err) | ||
|
||
err = wait.PollImmediate(time.Second, wait.ForeverTestTimeout, func() (bool, error) { | ||
lease, err = client.CoordinationV1().Leases(metav1.NamespaceSystem).Get(context.TODO(), leaseName, metav1.GetOptions{}) | ||
if err != nil { | ||
return false, nil | ||
} | ||
|
||
// expect only the holder identity to change after a restart | ||
newHolderIdentity := lease.Spec.HolderIdentity | ||
if newHolderIdentity == oldHolderIdentity { | ||
return false, nil | ||
} | ||
|
||
// wait for at least one lease heart beat after the holder identity changes | ||
if !lease.Spec.RenewTime.After(lastRenewedTime.Time) { | ||
return false, nil | ||
} | ||
|
||
return true, nil | ||
|
||
}) | ||
framework.ExpectNoError(err, "holder identity did not change after a restart") | ||
} | ||
|
||
// As long as the hostname of kube-apiserver is unchanged, a restart should not result in new Lease objects. | ||
// Check that the number of lease objects remains the same after restarting kube-apiserver. | ||
leases, err = client.CoordinationV1().Leases(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{ | ||
LabelSelector: "k8s.io/component=kube-apiserver", | ||
}) | ||
framework.ExpectNoError(err) | ||
framework.ExpectEqual(len(leases.Items), len(controlPlaneNodes), "unexpected number of leases") | ||
}) | ||
}) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -99,13 +99,23 @@ func TestCreateLeaseOnStart(t *testing.T) { | |||||
} | ||||||
|
||||||
func TestLeaseGarbageCollection(t *testing.T) { | ||||||
oldIdentityLeaseDurationSeconds := controlplane.IdentityLeaseDurationSeconds | ||||||
oldIdentityLeaseGCPeriod := controlplane.IdentityLeaseGCPeriod | ||||||
oldIdentityLeaseRenewIntervalPeriod := controlplane.IdentityLeaseRenewIntervalPeriod | ||||||
defer func() { | ||||||
// reset the default values for leases after this test | ||||||
controlplane.IdentityLeaseDurationSeconds = oldIdentityLeaseDurationSeconds | ||||||
controlplane.IdentityLeaseGCPeriod = oldIdentityLeaseGCPeriod | ||||||
controlplane.IdentityLeaseRenewIntervalPeriod = oldIdentityLeaseRenewIntervalPeriod | ||||||
}() | ||||||
|
||||||
// Shorten lease parameters so GC behavior can be exercised in integration tests | ||||||
controlplane.IdentityLeaseDurationSeconds = 1 | ||||||
controlplane.IdentityLeaseGCPeriod = time.Second | ||||||
controlplane.IdentityLeaseRenewIntervalPeriod = time.Second | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you set them back when you're done to avoid weird things in other tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch -- updated |
||||||
|
||||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.APIServerIdentity, true)() | ||||||
result := kubeapiservertesting.StartTestServerOrDie(t, nil, | ||||||
// This shorten the GC check period to make the test run faster. | ||||||
// Since we are testing GC behavior on leases we create, what happens to | ||||||
// the real apiserver lease doesn't matter. | ||||||
[]string{"--identity-lease-duration-seconds=1"}, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had to remove this test, because of the dependence to this flag. But I think the unit tests added in #113074 and the additional unit test I added in this PR should make up for the coverage we lose here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In other places we just let the integration test override the "constant" kubernetes/staging/src/k8s.io/client-go/transport/cert_rotation.go Lines 37 to 38 in f8750e2
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm -- updating the lease parameters into global vars seems worthwhile so we can run integration tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to use public vars |
||||||
framework.SharedEtcd()) | ||||||
result := kubeapiservertesting.StartTestServerOrDie(t, nil, nil, framework.SharedEtcd()) | ||||||
defer result.TearDownFn() | ||||||
|
||||||
kubeclient, err := kubernetes.NewForConfig(result.ClientConfig) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hah, that is one way to do it :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not using exec on pods?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
apiserver pods generally aren't part of the cluster (static pods managed only by a kubelet)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean, exec a pod on that node as hostNetwork , not execing on the apiserver pod ... but maybe this is restricted, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
kubectl debug
has tooling for running a more-privileged Pod on a node to learn about it; maybe we could use some of that approach?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just to clarify, I commented because I was curious, we are close to code freeze and I don't mean to delay this PR