diff --git a/test/extended/dr/OWNERS b/test/extended/dr/OWNERS index 483fe5842905..cf7822494465 100644 --- a/test/extended/dr/OWNERS +++ b/test/extended/dr/OWNERS @@ -1,10 +1,12 @@ reviewers: - - hexfusion - - lilic - - marun - - smarterclayton + - dusk125 + - hasbro17 + - Elbehery + - tjungblu approvers: - - hexfusion - - lilic - - marun - - smarterclayton + - deads2k + - hasbro17 + - sttts + - dusk125 + - Elbehery + - tjungblu diff --git a/test/extended/dr/common.go b/test/extended/dr/common.go index 0657d8847352..fb90c6af041e 100644 --- a/test/extended/dr/common.go +++ b/test/extended/dr/common.go @@ -3,27 +3,35 @@ package dr import ( "bytes" "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "encoding/pem" "fmt" "os/exec" "strings" "text/tabwriter" "time" + xssh "golang.org/x/crypto/ssh" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" + applyappsv1 "k8s.io/client-go/applyconfigurations/apps/v1" + applycorev1 "k8s.io/client-go/applyconfigurations/core/v1" + applymetav1 "k8s.io/client-go/applyconfigurations/meta/v1" + "k8s.io/client-go/dynamic" "k8s.io/kubernetes/test/e2e/framework" e2e "k8s.io/kubernetes/test/e2e/framework" e2elog "k8s.io/kubernetes/test/e2e/framework/log" e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" - "github.com/openshift/origin/test/e2e/upgrade" - exutil "github.com/openshift/origin/test/extended/util" - g "github.com/onsi/ginkgo/v2" o "github.com/onsi/gomega" + "github.com/openshift/origin/test/e2e/upgrade" + exutil "github.com/openshift/origin/test/extended/util" "github.com/stretchr/objx" ) @@ -303,3 +311,187 @@ func checkSSH(node *corev1.Node) { func ssh(cmd string, node *corev1.Node) (*e2essh.Result, error) { return e2essh.IssueSSHCommandWithResult(cmd, e2e.TestContext.Provider, node) } + +// installSSHKeyOnControlPlaneNodes will create a new private/public ssh keypair, +// create a new secret for both in the openshift-etcd-operator namespace. Then it +// will append the public key on the host core user authorized_keys file with a daemon set. +func installSSHKeyOnControlPlaneNodes(oc *exutil.CLI) error { + const name = "dr-ssh" + const namespace = "openshift-etcd-operator" + + err := createPrivatePublicSSHKeySecret(oc, name, namespace) + if err != nil { + return err + } + + err = createKeyInstallerDaemon(oc, name, namespace) + if err != nil { + return err + } + + err = ensureControlPlaneSSHAccess(oc, name, namespace) + if err != nil { + return err + } + + return nil +} + +// ensureControlPlaneSSHAccess will test that the private key generated and installed with installSSHKeyOnControlPlaneNodes +// is working on all control plane nodes. This effectively polls until the pod executing ssh succeeds reaching all nodes. +func ensureControlPlaneSSHAccess(oc *exutil.CLI, name string, namespace string) error { + const sshPath = "/home/core/.ssh" + testScript := fmt.Sprintf(` + #!/bin/bash + set +x + set -e + + # TODO for all host nodes + # ssh -i /home/core/.ssh/privKey core@10.0.0.5 + + SSH_BASEDIR=%s + P_KEY=$SSH_BASEDIR/privKey + # we can't change the permissions on the mount, thus we have to copy it somewhere else + mkdir -p $HOME/.ssh + chmod 700 ~/.ssh + cp $P_KEY $HOME/.ssh/id_rsa + P_KEY=$HOME/.ssh/id_rsa + chmod 600 $P_KEY + + # eval "$(ssh-agent -s)" + # ssh-add + # ssh -i $P_KEY -q core@10.0.0.5 exit + # echo $? + + sleep infinity`, sshPath) + + podSpec := applycorev1.PodSpec() + podSpec.Containers = []applycorev1.ContainerApplyConfiguration{ + *applycorev1.Container(). + WithName("ssh-key-tester"). + WithSecurityContext(applycorev1.SecurityContext().WithPrivileged(true)). + // TODO(thomas): is there a different rhel based ssh image available? + WithImage("registry.redhat.io/rhel7/rhel-tools"). + WithVolumeMounts( + applycorev1.VolumeMount().WithName("keys").WithMountPath(sshPath), + ). + WithCommand("/bin/bash", "-c", testScript), + } + podSpec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""} + podSpec.Tolerations = []applycorev1.TolerationApplyConfiguration{ + *applycorev1.Toleration().WithKey("node-role.kubernetes.io/master").WithOperator(corev1.TolerationOpExists).WithEffect(corev1.TaintEffectNoSchedule), + } + + podSpec.Volumes = []applycorev1.VolumeApplyConfiguration{ + *applycorev1.Volume().WithName("keys").WithSecret(applycorev1.SecretVolumeSource().WithSecretName(name)), + } + + pod := applycorev1.Pod(name, namespace).WithSpec(podSpec) + + // TODO wait for success + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _, err := oc.AdminKubeClient().CoreV1().Pods(namespace).Apply(ctx, pod, metav1.ApplyOptions{FieldManager: name}) + if err != nil { + return err + } + + return nil +} + +// createKeyInstallerDaemon will spawn a CP-only daemonset to add the publicKey created in +// createPrivatePublicSSHKeySecret to the core ssh server on the host machine. +func createKeyInstallerDaemon(oc *exutil.CLI, name string, namespace string) error { + const sshPath = "/home/core/.ssh" + labels := map[string]string{"name": "etcd-backup-server"} + + installScript := fmt.Sprintf(` + #!/bin/bash + + echo "installing public key on host" + FOLDER=%s + FILE_NAME="${FOLDER}/authorized_keys" + # not idempotent, will always append the key on pod restarts + mkdir -p $FOLDER && echo "$PUBLIC_KEY" >> $FILE_NAME && chmod 0400 $FILE_NAME && echo "installed public key successfully" + + # work around the DS restart policy by never exiting the container + sleep infinity`, sshPath) + podSpec := applycorev1.PodSpec() + podSpec.Containers = []applycorev1.ContainerApplyConfiguration{ + *applycorev1.Container(). + WithName("ssh-key-installer"). + WithSecurityContext(applycorev1.SecurityContext().WithPrivileged(true)). + // TODO(thomas): do we have another rhel base image somewhere? + WithImage("registry.redhat.io/rhel7"). + WithVolumeMounts( + applycorev1.VolumeMount().WithName("ssh").WithMountPath(sshPath), + ). + WithEnv(applycorev1.EnvVar().WithName("PUBLIC_KEY"). + WithValueFrom(applycorev1.EnvVarSource().WithSecretKeyRef(applycorev1.SecretKeySelector().WithName(name).WithKey("pubKey"))), + // appending the time to ensure the DS updates if it already exists + applycorev1.EnvVar().WithName("TIME").WithValue(time.Now().String())). + WithCommand("/bin/bash", "-c", installScript), + } + podSpec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""} + podSpec.Tolerations = []applycorev1.TolerationApplyConfiguration{ + *applycorev1.Toleration().WithKey("node-role.kubernetes.io/master").WithOperator(corev1.TolerationOpExists).WithEffect(corev1.TaintEffectNoSchedule), + } + + podSpec.Volumes = []applycorev1.VolumeApplyConfiguration{ + *applycorev1.Volume().WithName("ssh").WithHostPath(applycorev1.HostPathVolumeSource().WithPath(sshPath)), + *applycorev1.Volume().WithName("keys").WithSecret(applycorev1.SecretVolumeSource().WithSecretName(name)), + } + + ds := applyappsv1.DaemonSet(name, namespace).WithSpec(applyappsv1.DaemonSetSpec().WithTemplate( + applycorev1.PodTemplateSpec().WithName(name).WithSpec(podSpec).WithLabels(labels), + ).WithSelector(applymetav1.LabelSelector().WithMatchLabels(labels))) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _, err := oc.AdminKubeClient().AppsV1().DaemonSets(namespace).Apply(ctx, ds, metav1.ApplyOptions{FieldManager: name}) + if err != nil { + return err + } + return nil +} + +// createPrivatePublicSSHKeySecret will create a new private and public key as a +// secret with the given name in the given namespace +func createPrivatePublicSSHKeySecret(oc *exutil.CLI, name string, namespace string) error { + privateKey, err := rsa.GenerateKey(rand.Reader, 256) + if err != nil { + return fmt.Errorf("could not generate private key for CP nodes: %w", err) + } + + if err := privateKey.Validate(); err != nil { + return fmt.Errorf("could not validate private key for CP nodes: %w", err) + } + + der := x509.MarshalPKCS1PrivateKey(privateKey) + block := pem.Block{ + Type: "RSA PRIVATE KEY", + Headers: nil, + Bytes: der, + } + pemBytes := pem.EncodeToMemory(&block) + + publicKey, err := xssh.NewPublicKey(&privateKey.PublicKey) + if err != nil { + return fmt.Errorf("could not generate public key for CP nodes: %w", err) + } + + pubKey := xssh.MarshalAuthorizedKey(publicKey) + e2elog.Logf("successfully created new public key: %s", string(pubKey)) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + secret := applycorev1.Secret(name, namespace).WithData(map[string][]byte{ + "privKey": pemBytes, + "pubKey": pubKey, + }) + _, err = oc.AdminKubeClient().CoreV1().Secrets(namespace).Apply(ctx, secret, metav1.ApplyOptions{FieldManager: name}) + if err != nil { + return fmt.Errorf("could not save key secret for CP nodes: %w", err) + } + return nil +} diff --git a/test/extended/dr/quorum_restore.go b/test/extended/dr/quorum_restore.go index 7e5f48423a41..de53c06a6a52 100644 --- a/test/extended/dr/quorum_restore.go +++ b/test/extended/dr/quorum_restore.go @@ -50,6 +50,12 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() { oc := exutil.NewCLIWithoutNamespace("disaster-recovery") + // TODO(thomas): to be removed with a real test, this is just for testing the utility until then + g.It("[Feature:EtcdRecovery] should install ssh keys on CP nodes", func() { + err := installSSHKeyOnControlPlaneNodes(oc) + o.Expect(err).ToNot(o.HaveOccurred()) + }) + // Validate backing up and restoring to the same node on a cluster // that has lost quorum after the backup was taken. g.It("[Feature:EtcdRecovery] Cluster should restore itself after quorum loss [apigroup:machine.openshift.io][apigroup:operator.openshift.io]", func() { diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 162a381ca97d..6f3f15f0cee6 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -2,6 +2,7 @@ package generated import ( "fmt" + "github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2/types" ) @@ -1771,6 +1772,8 @@ var Annotations = map[string]string{ "[sig-etcd][Feature:DisasterRecovery][Disruptive] [Feature:EtcdRecovery] Cluster should restore itself after quorum loss [apigroup:machine.openshift.io][apigroup:operator.openshift.io]": " [Serial]", + "[sig-etcd][Feature:DisasterRecovery][Disruptive] [Feature:EtcdRecovery] should install ssh keys on CP nodes": " [Serial]", + "[sig-etcd][Feature:EtcdVerticalScaling][Suite:openshift/etcd/scaling] etcd [apigroup:config.openshift.io] is able to vertically scale up and down with a single node [Timeout:60m][apigroup:machine.openshift.io]": "", "[sig-imageregistry] Image registry [apigroup:route.openshift.io] should redirect on blob pull [apigroup:image.openshift.io]": " [Suite:openshift/conformance/parallel]",