Skip to content

Commit

Permalink
ETCD-399: Restore Test - Create scaffolding for platform agnostic ssh…
Browse files Browse the repository at this point in the history
… access

This adds a DS that installs ssh keys, stored in a secret in the
openshift-etcd-operator namespace.
  • Loading branch information
tjungblu committed Mar 15, 2023
1 parent 1135825 commit 3f7cd15
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 11 deletions.
18 changes: 10 additions & 8 deletions test/extended/dr/OWNERS
@@ -1,10 +1,12 @@
reviewers:
- hexfusion
- lilic
- marun
- smarterclayton
- dusk125
- hasbro17
- Elbehery
- tjungblu
approvers:
- hexfusion
- lilic
- marun
- smarterclayton
- deads2k
- hasbro17
- sttts
- dusk125
- Elbehery
- tjungblu
198 changes: 195 additions & 3 deletions test/extended/dr/common.go
Expand Up @@ -3,27 +3,35 @@ package dr
import (
"bytes"
"context"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"encoding/pem"
"fmt"
"os/exec"
"strings"
"text/tabwriter"
"time"

xssh "golang.org/x/crypto/ssh"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
applyappsv1 "k8s.io/client-go/applyconfigurations/apps/v1"
applycorev1 "k8s.io/client-go/applyconfigurations/core/v1"
applymetav1 "k8s.io/client-go/applyconfigurations/meta/v1"

"k8s.io/client-go/dynamic"
"k8s.io/kubernetes/test/e2e/framework"
e2e "k8s.io/kubernetes/test/e2e/framework"
e2elog "k8s.io/kubernetes/test/e2e/framework/log"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"

"github.com/openshift/origin/test/e2e/upgrade"
exutil "github.com/openshift/origin/test/extended/util"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
"github.com/openshift/origin/test/e2e/upgrade"
exutil "github.com/openshift/origin/test/extended/util"
"github.com/stretchr/objx"
)

Expand Down Expand Up @@ -303,3 +311,187 @@ func checkSSH(node *corev1.Node) {
func ssh(cmd string, node *corev1.Node) (*e2essh.Result, error) {
return e2essh.IssueSSHCommandWithResult(cmd, e2e.TestContext.Provider, node)
}

// installSSHKeyOnControlPlaneNodes will create a new private/public ssh keypair,
// create a new secret for both in the openshift-etcd-operator namespace. Then it
// will append the public key on the host core user authorized_keys file with a daemon set.
func installSSHKeyOnControlPlaneNodes(oc *exutil.CLI) error {
const name = "dr-ssh"
const namespace = "openshift-etcd-operator"

err := createPrivatePublicSSHKeySecret(oc, name, namespace)
if err != nil {
return err
}

err = createKeyInstallerDaemon(oc, name, namespace)
if err != nil {
return err
}

err = ensureControlPlaneSSHAccess(oc, name, namespace)
if err != nil {
return err
}

return nil
}

// ensureControlPlaneSSHAccess will test that the private key generated and installed with installSSHKeyOnControlPlaneNodes
// is working on all control plane nodes. This effectively polls until the pod executing ssh succeeds reaching all nodes.
func ensureControlPlaneSSHAccess(oc *exutil.CLI, name string, namespace string) error {
const sshPath = "/home/core/.ssh"
testScript := fmt.Sprintf(`
#!/bin/bash
set +x
set -e
# TODO for all host nodes
# ssh -i /home/core/.ssh/privKey core@10.0.0.5
SSH_BASEDIR=%s
P_KEY=$SSH_BASEDIR/privKey
# we can't change the permissions on the mount, thus we have to copy it somewhere else
mkdir -p $HOME/.ssh
chmod 700 ~/.ssh
cp $P_KEY $HOME/.ssh/id_rsa
P_KEY=$HOME/.ssh/id_rsa
chmod 600 $P_KEY
# eval "$(ssh-agent -s)"
# ssh-add
# ssh -i $P_KEY -q core@10.0.0.5 exit
# echo $?
sleep infinity`, sshPath)

podSpec := applycorev1.PodSpec()
podSpec.Containers = []applycorev1.ContainerApplyConfiguration{
*applycorev1.Container().
WithName("ssh-key-tester").
WithSecurityContext(applycorev1.SecurityContext().WithPrivileged(true)).
// TODO(thomas): is there a different rhel based ssh image available?
WithImage("registry.redhat.io/rhel7/rhel-tools").
WithVolumeMounts(
applycorev1.VolumeMount().WithName("keys").WithMountPath(sshPath),
).
WithCommand("/bin/bash", "-c", testScript),
}
podSpec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""}
podSpec.Tolerations = []applycorev1.TolerationApplyConfiguration{
*applycorev1.Toleration().WithKey("node-role.kubernetes.io/master").WithOperator(corev1.TolerationOpExists).WithEffect(corev1.TaintEffectNoSchedule),
}

podSpec.Volumes = []applycorev1.VolumeApplyConfiguration{
*applycorev1.Volume().WithName("keys").WithSecret(applycorev1.SecretVolumeSource().WithSecretName(name)),
}

pod := applycorev1.Pod(name, namespace).WithSpec(podSpec)

// TODO wait for success
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err := oc.AdminKubeClient().CoreV1().Pods(namespace).Apply(ctx, pod, metav1.ApplyOptions{FieldManager: name})
if err != nil {
return err
}

return nil
}

// createKeyInstallerDaemon will spawn a CP-only daemonset to add the publicKey created in
// createPrivatePublicSSHKeySecret to the core ssh server on the host machine.
func createKeyInstallerDaemon(oc *exutil.CLI, name string, namespace string) error {
const sshPath = "/home/core/.ssh"
labels := map[string]string{"name": "etcd-backup-server"}

installScript := fmt.Sprintf(`
#!/bin/bash
echo "installing public key on host"
FOLDER=%s
FILE_NAME="${FOLDER}/authorized_keys"
# not idempotent, will always append the key on pod restarts
mkdir -p $FOLDER && echo "$PUBLIC_KEY" >> $FILE_NAME && chmod 0400 $FILE_NAME && echo "installed public key successfully"
# work around the DS restart policy by never exiting the container
sleep infinity`, sshPath)
podSpec := applycorev1.PodSpec()
podSpec.Containers = []applycorev1.ContainerApplyConfiguration{
*applycorev1.Container().
WithName("ssh-key-installer").
WithSecurityContext(applycorev1.SecurityContext().WithPrivileged(true)).
// TODO(thomas): do we have another rhel base image somewhere?
WithImage("registry.redhat.io/rhel7").
WithVolumeMounts(
applycorev1.VolumeMount().WithName("ssh").WithMountPath(sshPath),
).
WithEnv(applycorev1.EnvVar().WithName("PUBLIC_KEY").
WithValueFrom(applycorev1.EnvVarSource().WithSecretKeyRef(applycorev1.SecretKeySelector().WithName(name).WithKey("pubKey"))),
// appending the time to ensure the DS updates if it already exists
applycorev1.EnvVar().WithName("TIME").WithValue(time.Now().String())).
WithCommand("/bin/bash", "-c", installScript),
}
podSpec.NodeSelector = map[string]string{"node-role.kubernetes.io/master": ""}
podSpec.Tolerations = []applycorev1.TolerationApplyConfiguration{
*applycorev1.Toleration().WithKey("node-role.kubernetes.io/master").WithOperator(corev1.TolerationOpExists).WithEffect(corev1.TaintEffectNoSchedule),
}

podSpec.Volumes = []applycorev1.VolumeApplyConfiguration{
*applycorev1.Volume().WithName("ssh").WithHostPath(applycorev1.HostPathVolumeSource().WithPath(sshPath)),
*applycorev1.Volume().WithName("keys").WithSecret(applycorev1.SecretVolumeSource().WithSecretName(name)),
}

ds := applyappsv1.DaemonSet(name, namespace).WithSpec(applyappsv1.DaemonSetSpec().WithTemplate(
applycorev1.PodTemplateSpec().WithName(name).WithSpec(podSpec).WithLabels(labels),
).WithSelector(applymetav1.LabelSelector().WithMatchLabels(labels)))
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err := oc.AdminKubeClient().AppsV1().DaemonSets(namespace).Apply(ctx, ds, metav1.ApplyOptions{FieldManager: name})
if err != nil {
return err
}
return nil
}

// createPrivatePublicSSHKeySecret will create a new private and public key as a
// secret with the given name in the given namespace
func createPrivatePublicSSHKeySecret(oc *exutil.CLI, name string, namespace string) error {
privateKey, err := rsa.GenerateKey(rand.Reader, 256)
if err != nil {
return fmt.Errorf("could not generate private key for CP nodes: %w", err)
}

if err := privateKey.Validate(); err != nil {
return fmt.Errorf("could not validate private key for CP nodes: %w", err)
}

der := x509.MarshalPKCS1PrivateKey(privateKey)
block := pem.Block{
Type: "RSA PRIVATE KEY",
Headers: nil,
Bytes: der,
}
pemBytes := pem.EncodeToMemory(&block)

publicKey, err := xssh.NewPublicKey(&privateKey.PublicKey)
if err != nil {
return fmt.Errorf("could not generate public key for CP nodes: %w", err)
}

pubKey := xssh.MarshalAuthorizedKey(publicKey)
e2elog.Logf("successfully created new public key: %s", string(pubKey))

ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

secret := applycorev1.Secret(name, namespace).WithData(map[string][]byte{
"privKey": pemBytes,
"pubKey": pubKey,
})
_, err = oc.AdminKubeClient().CoreV1().Secrets(namespace).Apply(ctx, secret, metav1.ApplyOptions{FieldManager: name})
if err != nil {
return fmt.Errorf("could not save key secret for CP nodes: %w", err)
}
return nil
}
6 changes: 6 additions & 0 deletions test/extended/dr/quorum_restore.go
Expand Up @@ -50,6 +50,12 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {

oc := exutil.NewCLIWithoutNamespace("disaster-recovery")

// TODO(thomas): to be removed with a real test, this is just for testing the utility until then
g.It("[Feature:EtcdRecovery] should install ssh keys on CP nodes", func() {
err := installSSHKeyOnControlPlaneNodes(oc)
o.Expect(err).ToNot(o.HaveOccurred())
})

// Validate backing up and restoring to the same node on a cluster
// that has lost quorum after the backup was taken.
g.It("[Feature:EtcdRecovery] Cluster should restore itself after quorum loss [apigroup:machine.openshift.io][apigroup:operator.openshift.io]", func() {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3f7cd15

Please sign in to comment.