Skip to content

Commit

Permalink
Talm second fails (#60)
Browse files Browse the repository at this point in the history
* Added AfterEach block and another talm test

* Added another talm test

* Added 1 more talm batching test

* Added talm_backup test
  • Loading branch information
shaior committed Feb 5, 2024
1 parent aa93445 commit 2476b64
Show file tree
Hide file tree
Showing 4 changed files with 671 additions and 5 deletions.
58 changes: 58 additions & 0 deletions tests/ranfunc/talm/internal/talmhelper/talmhelper.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
placementrulev1 "open-cluster-management.io/multicloud-operators-subscription/pkg/apis/apps/placementrule/v1"
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
)

var (
Expand Down Expand Up @@ -1092,3 +1093,60 @@ func WaitForCguToTimeout(cguName string, namespace string, timeout time.Duration
timeout,
)
}

// GetPolicyNameWithPrefix finds a policy starting with policyPrefix and returns
// the name of first policy that matches or returns a blank string if not found.
func GetPolicyNameWithPrefix(
client *clients.Settings,
policyPrefix string,
namespace string) (string, error) {
generatedPolicyName := ""

// Get a list of policies from the cluster
policyList, err := ocm.ListPoliciesInAllNamespaces(client, runtimeclient.ListOptions{Namespace: namespace})

// Filter errors that don't matter
err = FilterMissingResourceErrors(err)
if err != nil {
return generatedPolicyName, err
}

// Check the returned policy list for the specific policy
for _, policy := range policyList {
if strings.HasPrefix(policy.Object.Name, policyPrefix) {
generatedPolicyName = policy.Object.Name

return generatedPolicyName, nil
}
}

return generatedPolicyName, nil
}

// WaitUntilObjectDoesNotExist can be called to wait until a specified resource is deleted.
// This is called by all of the DeleteXAndWait functions in this file.
func WaitUntilObjectDoesNotExist(
client *clients.Settings,
objectName string,
namespace string,
getStatus func(client *clients.Settings, objectName string, namespace string) (bool, error)) error {
// Wait for it to exist
err := wait.PollUntilContextTimeout(context.TODO(), 15*time.Second, 5*time.Minute, true,
func(context.Context) (bool, error) {
status, err := getStatus(client, objectName, namespace)

// May or may not exist
err = FilterMissingResourceErrors(err)
if err == nil && !status {
// Did exist, but is gone now
return true, nil
}

// Assume it still exists
return false, nil

},
)

return err
}
258 changes: 258 additions & 0 deletions tests/ranfunc/talm/tests/talm_backup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
package tests

import (
"fmt"
"strings"
"time"

"github.com/golang/glog"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/openshift-kni/eco-goinfra/pkg/cgu"
"github.com/openshift-kni/eco-goinfra/pkg/clients"
"github.com/openshift-kni/eco-goinfra/pkg/namespace"
"github.com/openshift-kni/eco-goinfra/pkg/nodes"
"github.com/openshift-kni/eco-gosystem/tests/internal/cmd"
"github.com/openshift-kni/eco-gosystem/tests/ranfunc/internal/ranfunchelper"
"github.com/openshift-kni/eco-gosystem/tests/ranfunc/internal/ranfuncinittools"
"github.com/openshift-kni/eco-gosystem/tests/ranfunc/talm/internal/talmhelper"
"github.com/openshift-kni/eco-gosystem/tests/ranfunc/talm/internal/talmparams"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
configurationPolicyv1 "open-cluster-management.io/config-policy-controller/api/v1"
)

const (
backupPath = "/var/recovery"
ranTestPath = "/var/ran-test-talm-recovery"
fsSize = "100M"
)

var (
nodeName string
loopBackDevicePath string
)

var _ = Describe("Talm Backup Tests with single spoke", func() {

BeforeEach(func() {
if !ranfunchelper.IsVersionStringInRange(
talmhelper.TalmHubVersion,
"4.11",
"",
) {
Skip("backup tests require talm 4.11 or higher")
}
})

// ocp-50835
Context("with full disk for spoke1", func() {
curName := "disk-full-single-spoke"
cguName := fmt.Sprintf("%s-%s", talmparams.CguCommonName, curName)
policyName := fmt.Sprintf("%s-%s", talmparams.PolicyNameCommonName, curName)

BeforeEach(func() {
By("setting up filesystem to simulate low space")
nodeList, err := nodes.List(ranfuncinittools.HubAPIClient)
Expect(err).ToNot(HaveOccurred())
Expect(len(nodeList)).To(BeNumerically(">=", 1))

nodeName := nodeList[0].Object.Name
loopBackDevicePath = prepareEnvWithSmallMountPoint(nodeName)
})

AfterEach(func() {
glog.V(100).Info("starting disk-full env clean up")
diskFullEnvCleanup(nodeName, curName, loopBackDevicePath)

// Delete temporary namespace on spoke cluster.
spokeClusterList := []*clients.Settings{ranfuncinittools.SpokeAPIClient}
err := talmhelper.CleanupNamespace(spokeClusterList, talmhelper.TemporaryNamespaceName)
Expect(err).ToNot(HaveOccurred())
})

It("should have a failed cgu for single spoke", func() {
By("applying all the required CRs for backup")
// prep cgu
cgu := talmhelper.GetCguDefinition(
cguName,
[]string{talmhelper.Spoke1Name},
[]string{},
[]string{policyName},
talmparams.TalmTestNamespace, 1, 240)
cgu.Definition.Spec.Backup = true

// apply
err := talmhelper.CreatePolicyAndCgu(
ranfuncinittools.HubAPIClient,
namespace.NewBuilder(ranfuncinittools.HubAPIClient, talmhelper.TemporaryNamespaceName).Definition,
configurationPolicyv1.MustHave,
configurationPolicyv1.Inform,
policyName,
fmt.Sprintf("%s-%s", talmparams.PolicySetNameCommonName, curName),
fmt.Sprintf("%s-%s", talmparams.PlacementBindingCommonName, curName),
fmt.Sprintf("%s-%s", talmparams.PlacementRuleCommonName, curName),
talmparams.TalmTestNamespace,
metav1.LabelSelector{},
cgu,
)
Expect(err).To(BeNil())

By("waiting for cgu to fail for spoke1")
assertBackupStatus(cgu.Definition.Name, talmhelper.Spoke1Name, "UnrecoverableError")

})

})

})

// diskFullEnvCleanup clean all the resources created for single cluster backup fail.
func diskFullEnvCleanup(nodeName, curName, currentlyUsingLoopDevicePath string) {
// delete generated CRs
talmhelper.CleanupTestResourcesOnClient(
ranfuncinittools.HubAPIClient,
fmt.Sprintf("%s-%s", talmparams.CguCommonName, curName),
fmt.Sprintf("%s-%s", talmparams.PolicyNameCommonName, curName),
talmparams.TalmTestNamespace,
fmt.Sprintf("%s-%s", talmparams.PlacementBindingCommonName, curName),
fmt.Sprintf("%s-%s", talmparams.PlacementRuleCommonName, curName),
fmt.Sprintf("%s-%s", talmparams.PolicySetNameCommonName, curName),
"",
false,
)

// check where backup dir is mounted and start clean up
safeToDeleteBackupDir := true
// retrieve all mounts for backup dir
output, err := cmd.ExecCmd([]string{fmt.Sprintf("findmnt -n -o SOURCE --target %s", backupPath)}, nodeName)
Expect(err).To(BeNil())

output = strings.TrimSuffix(output, "\n")
if output != "" {
outputArr := strings.Split(output, "\n")
for _, devicePath := range outputArr {
// retrieve all devices e.g part or loop
deviceType, err := cmd.ExecCmd([]string{fmt.Sprintf("lsblk %s -o TYPE -n", devicePath)}, nodeName)
Expect(err).To(BeNil())

deviceType = strings.TrimSuffix(deviceType, "\n")

if deviceType == "part" {
safeToDeleteBackupDir = false

glog.V(100).Info("partition detected for %s, "+
"will not attempt to delete the folder (only the content if any)", backupPath)
} else if deviceType == "loop" {

if currentlyUsingLoopDevicePath == devicePath {
// unmount and detach the loop device
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo umount --detach-loop %s", backupPath)}, nodeName)
Expect(err).To(BeNil())

} else {
safeToDeleteBackupDir = false
glog.V(100).Info("WARNING: most likely cleanup didnt complete during the previous run. ")
/*
Assuming loop0 is the unwanted one...
look for clues with lsblk
$ lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
loop0 7:0 0 100M 0 loop /var/recovery -----> this line should not be there
unmount it with: `sudo umount --detach-loop /var/recovery`
check lsblk to verify there's nothing mounted to loop0 and line is gone completely
if line is still there (but unmounted) make use `losetup` to see the status of loopdevice (loop0)
$ losetup
NAME SIZELIMIT OFFSET AUTOCLEAR RO BACK-FILE DIO LOG-SEC
/dev/loop0 0 0 1 0 /var/ran-test-talm-recovery/100M.img (deleted) 0 512
if you see (deleted) -- reboot the node. i.e sudo reboot.
Once back loop0 should not appear anywhere (lsblk + losetup)
*/
glog.V(100).Info("See comments for manual cleanup of %s\n", devicePath)
}
}
}
}

// if true there was a partition (most likely ZTP /w MC) so delete content instead of the whole thing
if safeToDeleteBackupDir {
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo rm -rf %s", backupPath)}, nodeName)
Expect(err).To(BeNil())
} else {
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo rm -rf %s/*", backupPath)}, nodeName)
Expect(err).To(BeNil())
}

// delete ran-test-talm-recovery folder
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo rm -rf %s", ranTestPath)}, nodeName)
Expect(err).To(BeNil())
}

// prepareEnvWithSmallMountPoint use loopback device,
// a virtual file system backed by a file, to create a small partition
// helpful links https://stackoverflow.com/q/16044204 and https://youtu.be/r9CQhwci4tE
func prepareEnvWithSmallMountPoint(nodeName string) string {
// create a dir for backup if not already there
_, err := cmd.ExecCmd([]string{fmt.Sprintf("sudo mkdir -p %s", backupPath)}, nodeName)
Expect(err).To(BeNil())

// create a dir for ran test dir if not already there
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo mkdir -p %s", ranTestPath)}, nodeName)
Expect(err).To(BeNil())

// find the next available loopback device (OS takes care of creating a new one if needed)
loopBackDevicePath, err := cmd.ExecCmd([]string{"sudo losetup -f"}, nodeName)
Expect(err).To(BeNil())

loopBackDevicePath = strings.TrimSpace(loopBackDevicePath)
glog.V(100).Info("loopback device path: ", loopBackDevicePath)

// create a file with desired size. It's where the file-system will live
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo fallocate -l %s %s/%s.img", fsSize, ranTestPath, fsSize)}, nodeName)
Expect(err).To(BeNil())

// create the loop device by assigning it with the file. tip: use losetup -a to check the status
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo losetup %s %s/%s.img", loopBackDevicePath,
ranTestPath, fsSize)}, nodeName)
Expect(err).To(BeNil())

// format to your desired fs type. xfs is RH preferred but ext4 works too.
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo mkfs.xfs -f -q %s", loopBackDevicePath)}, nodeName)
Expect(err).To(BeNil())

// mount the fs to backup dir
_, err = cmd.ExecCmd([]string{fmt.Sprintf("sudo mount %s %s", loopBackDevicePath, backupPath)}, nodeName)
Expect(err).To(BeNil())

return loopBackDevicePath
}

// assertBackupPodLog asserts status of backup struct.
func assertBackupStatus(cguName, spokeName, expectation string) {
Eventually(func() string {

cgu, err := cgu.Pull(ranfuncinittools.HubAPIClient, cguName, talmparams.TalmTestNamespace)
Expect(err).To(BeNil())

if cgu.Object.Status.Backup == nil {
glog.V(100).Info("backup struct not ready yet")

return ""
}

_, ok := cgu.Object.Status.Backup.Status[spokeName]
if !ok {
glog.V(100).Info("cluster name as key did not appear yet")

return ""
}

glog.V(100).Infof("[%s] %s backup status: %s\n", cgu.Object.Name, spokeName,
cgu.Object.Status.Backup.Status[spokeName])

return cgu.Object.Status.Backup.Status[spokeName]
}, 10*time.Minute, 10*time.Second).Should(Equal(expectation))
}
Loading

0 comments on commit 2476b64

Please sign in to comment.