Skip to content

Commit

Permalink
update code to automate nto bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
liqcui committed Nov 23, 2023
1 parent ad0c73d commit 8bad0b4
Show file tree
Hide file tree
Showing 3 changed files with 276 additions and 1 deletion.
83 changes: 82 additions & 1 deletion test/extended/node_tuning/node_tuning.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package node_tuning

import (
"context"
"fmt"
"path/filepath"
"strings"
Expand All @@ -10,7 +11,12 @@ import (
o "github.com/onsi/gomega"

exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/dynamic"
"k8s.io/kubernetes/test/e2e/framework"
e2e "k8s.io/kubernetes/test/e2e/framework"
)

Expand All @@ -19,7 +25,7 @@ var _ = g.Describe("[sig-node-tuning] NTO should", func() {

var (
ntoNamespace = "openshift-cluster-node-tuning-operator"
oc = exutil.NewCLIWithoutNamespace("nto")
oc = exutil.NewCLIWithoutNamespace("nto").AsAdmin()
buildPruningBaseDir = exutil.FixturePath("testdata", "node_tuning")
ntoStalldFile = filepath.Join(buildPruningBaseDir, "nto-stalld.yaml")
stalldCurrentPID string
Expand Down Expand Up @@ -111,6 +117,81 @@ var _ = g.Describe("[sig-node-tuning] NTO should", func() {

err = fmt.Errorf("case: %v\nexpected error got because of %v", g.CurrentSpecReport().FullText(), fmt.Sprintf("stalld service restarted : %v", errWait))
o.Expect(err).NotTo(o.HaveOccurred())
})

//OCPBUGS-18052
g.It("SNO installation does not finish due to wait for non-existing machine-config [Early]", func() {
isSNO := isSNOCluster(oc)
if !isSNO {
g.Skip("only test on SNO cluster, skipping it ...")
}

var (
mcpConfigDaemonset *corev1.Pod
errMasterUpdating error
)

ctx := context.TODO()
nodeClient := oc.KubeClient().CoreV1().Nodes()
firstMasterNodeName, err := getFirstMasterNode(ctx, nodeClient)
o.Expect(err).NotTo(o.HaveOccurred())

e2e.Logf("assert the status of mcp master")

config, err := framework.LoadConfig()
framework.ExpectNoError(err)
dynamicClient := dynamic.NewForConfigOrDie(config)
//Wait for 10 minutes and check mcp master stay on normal state
errWait := wait.Poll(30*time.Second, 600*time.Second, func() (bool, error) {
mcps := dynamicClient.Resource(schema.GroupVersionResource{
Group: "machineconfiguration.openshift.io",
Version: "v1",
Resource: "machineconfigpools",
})
pools, err := mcps.List(context.Background(), metav1.ListOptions{})
if err != nil {
framework.Logf("error getting pools %v", err)
return false, nil
}
allUpdated := true
for _, p := range pools.Items {
updated, requiresUpdate := IsPoolUpdated(mcps, p.GetName())
allUpdated = allUpdated && updated

// Invariant: when CVO reaches level, MCO is required to have rolled out control plane updates
if p.GetName() == "master" && requiresUpdate && errMasterUpdating == nil {
errMasterUpdating = fmt.Errorf("the %q pool should be updated before the CVO reports available at the new version", p.GetName())
framework.Logf("Invariant violation detected: %s", errMasterUpdating)
}
}
e2e.Logf("the status of mcp master restore to normal state")
return allUpdated, nil

})

if errWait != nil {
e2e.Logf("%v", errWait)
return
}

e2e.Logf("assert the status of co machine-config")
assertCOStatus(oc, "machine-config")

e2e.Logf("assert the status of co node-tuning ")
assertCOStatus(oc, "node-tuning")

kf := oc.KubeFramework()
mcpConfigDaemonset, _ = exutil.GetMachineConfigDaemonByNode(kf.ClientSet, firstMasterNodeName)

e2e.Logf("mcpConfigDaemonsetPodName %v", mcpConfigDaemonset.Name)
o.Expect(err).NotTo(o.HaveOccurred())

e2e.Logf("Get pod logs for %v", mcpConfigDaemonset.Name)
podLogsStdout, err := GetPodLogsLastLines(context.Background(), oc.KubeClient(), "openshift-machine-config-operator", mcpConfigDaemonset.Name, "machine-config-daemon", 20)
o.Expect(err).NotTo(o.HaveOccurred())

e2e.Logf("Check if the log of %v contains keyword [Marking Degraded due to|not found]", mcpConfigDaemonset.Name)
logAssertResult := assertPodLogsLastLines(mcpConfigDaemonset.Name, podLogsStdout, "Marking Degraded due to|not found")
o.Expect(logAssertResult).To(o.BeFalse())
})
})
192 changes: 192 additions & 0 deletions test/extended/node_tuning/node_tuning_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package node_tuning

import (
"context"
"fmt"
"regexp"
"strconv"
"strings"
"time"

o "github.com/onsi/gomega"
configv1 "github.com/openshift/api/config/v1"
exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/dynamic"
clientset "k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/test/e2e/framework"
e2e "k8s.io/kubernetes/test/e2e/framework"
)

const masterNodeRoleLabel = "node-role.kubernetes.io/master"

// IsSNOCluster will check if OCP is a single node cluster
func isSNOCluster(oc *exutil.CLI) bool {
infrastructureType, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
e2e.Logf("the cluster type is %v", infrastructureType.Status.ControlPlaneTopology)
return infrastructureType.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode

}

func getFirstMasterNode(ctx context.Context, nodeClient v1.NodeInterface) (*corev1.Node, error) {
masterNodes, err := nodeClient.List(ctx, metav1.ListOptions{LabelSelector: masterNodeRoleLabel})
if err != nil {
fmt.Printf("failed to list master nodes:'%w'", err)
}
var firstMasterName *corev1.Node
for i, masterNode := range masterNodes.Items {
if i == 0 {
firstMasterName = &masterNode
e2e.Logf("the first masterNode is %v", masterNode.Name)
break
}
}
return firstMasterName, err
}

// func assertPodLogsLastLines(oc *exutil.CLI, namespace string, podName string, lineN string, filter string) bool {
func assertPodLogsLastLines(podName string, podLogs string, filter string) bool {
regNTOPodLogs, err := regexp.Compile(".*" + filter + ".*")
o.Expect(err).NotTo(o.HaveOccurred())
isMatch := regNTOPodLogs.MatchString(podLogs)
if isMatch {
loglines := regNTOPodLogs.FindAllString(podLogs, -1)
e2e.Logf("the logs of pod %v is [%v]", podName, loglines[0])
return true
}
e2e.Logf("the keywords [%s] of pod isn't found ...", filter)
return false
}

func GetPodLogsLastLines(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, lastlines int) (string, error) {
return getPodLogsInternal(ctx, c, namespace, podName, containerName, false, nil, &lastlines)
}

// utility function for gomega Eventually
func getPodLogsInternal(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time, tailLines *int) (string, error) {
request := c.CoreV1().RESTClient().Get().
Resource("pods").
Namespace(namespace).
Name(podName).SubResource("log").
Param("container", containerName).
Param("previous", strconv.FormatBool(previous))
if sinceTime != nil {
request.Param("sinceTime", sinceTime.Format(time.RFC3339))
}
if tailLines != nil {
request.Param("tailLines", strconv.Itoa(*tailLines))
}
logs, err := request.Do(ctx).Raw()
if err != nil {
return "", err
}
if strings.Contains(string(logs), "Internal Error") {
return "", fmt.Errorf("Fetched log contains \"Internal Error\": %q", string(logs))
}
return string(logs), err
}

func IsPoolUpdated(dc dynamic.NamespaceableResourceInterface, name string) (poolUpToDate bool, poolIsUpdating bool) {
pool, err := dc.Get(context.Background(), name, metav1.GetOptions{})
if err != nil {
framework.Logf("error getting pool %s: %v", name, err)
return false, false
}

paused, found, err := unstructured.NestedBool(pool.Object, "spec", "paused")
if err != nil || !found {
return false, false
}

conditions, found, err := unstructured.NestedFieldNoCopy(pool.Object, "status", "conditions")
if err != nil || !found {
return false, false
}
original, ok := conditions.([]interface{})
if !ok {
return false, false
}
var updated, updating, degraded bool
for _, obj := range original {
o, ok := obj.(map[string]interface{})
if !ok {
return false, false
}
t, found, err := unstructured.NestedString(o, "type")
if err != nil || !found {
return false, false
}
s, found, err := unstructured.NestedString(o, "status")
if err != nil || !found {
return false, false
}
if t == "Updated" && s == "True" {
updated = true
}
if t == "Updating" && s == "True" {
updating = true
}
if t == "Degraded" && s == "True" {
degraded = true
}
}
if paused {
framework.Logf("Pool %s is paused, treating as up-to-date (Updated: %v, Updating: %v, Degraded: %v)", name, updated, updating, degraded)
return true, updating
}
if updated && !updating && !degraded {
return true, updating
}
framework.Logf("Pool %s is still reporting (Updated: %v, Updating: %v, Degraded: %v)", name, updated, updating, degraded)
return false, updating
}

func findCondition(conditions []configv1.ClusterOperatorStatusCondition, name configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition {
for i := range conditions {
if name == conditions[i].Type {
return &conditions[i]
}
}
return nil
}

func assertCOStatus(oc *exutil.CLI, coName string) {
var (
clusterOperators []configv1.ClusterOperator
desiredCO configv1.ClusterOperator
)
clusterOperatorsList, err := oc.AdminConfigClient().ConfigV1().ClusterOperators().List(context.Background(), metav1.ListOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
clusterOperators = clusterOperatorsList.Items
for _, clusterOperator := range clusterOperators {
if clusterOperator.Name == coName {
desiredCO = clusterOperator
e2e.Logf("desiredCO Name is %v", desiredCO.Name)
e2e.Logf("desiredCO.status.conditions of %v is %v", desiredCO.Name, desiredCO.Status.Conditions)
break
}
}
errWait := wait.Poll(30*time.Second, 600*time.Second, func() (bool, error) {
available := findCondition(desiredCO.Status.Conditions, configv1.OperatorAvailable)
degraded := findCondition(desiredCO.Status.Conditions, configv1.OperatorDegraded)
progressing := findCondition(desiredCO.Status.Conditions, configv1.OperatorProgressing)
if available.Status == configv1.ConditionTrue &&
degraded.Status == configv1.ConditionFalse &&
progressing.Status == configv1.ConditionFalse {
e2e.Logf("the status of status of cluster operator %v restore to normal state", coName)
return true, nil
}
e2e.Logf("the status of machine-config co is: available.Status [%v] degraded.Status [%v] and progressing.Status [%v] , check again", available.Status, degraded.Status, progressing.Status)
return false, nil
})

if errWait != nil {
e2e.Logf("%v", errWait)
return
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8bad0b4

Please sign in to comment.