Skip to content

Commit

Permalink
add monitor test for kubelet selinux labels
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Hannon <kehannon@redhat.com>
  • Loading branch information
kannon92 committed Apr 1, 2024
1 parent 0d6231f commit d119620
Show file tree
Hide file tree
Showing 4 changed files with 371 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/network/disruptionserviceloadbalancer"
"github.com/openshift/origin/pkg/monitortests/network/legacynetworkmonitortests"
"github.com/openshift/origin/pkg/monitortests/node/kubeletlogcollector"
"github.com/openshift/origin/pkg/monitortests/node/kubeletselinuxlabels"
"github.com/openshift/origin/pkg/monitortests/node/legacynodemonitortests"
"github.com/openshift/origin/pkg/monitortests/node/nodestateanalyzer"
"github.com/openshift/origin/pkg/monitortests/node/watchnodes"
Expand Down Expand Up @@ -159,6 +160,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio
monitorTestRegistry.AddMonitorTestOrDie("node-state-analyzer", "Node / Kubelet", nodestateanalyzer.NewAnalyzer())
monitorTestRegistry.AddMonitorTestOrDie("pod-lifecycle", "Node / Kubelet", watchpods.NewPodWatcher())
monitorTestRegistry.AddMonitorTestOrDie("node-lifecycle", "Node / Kubelet", watchnodes.NewNodeWatcher())
monitorTestRegistry.AddMonitorTestOrDie("node-kubelet-selinux-labels", "Node / Kubelet", kubeletselinuxlabels.NewSelinuxLabelWatcher())

monitorTestRegistry.AddMonitorTestOrDie("legacy-storage-invariants", "Storage", legacystoragemonitortests.NewLegacyTests())

Expand Down
260 changes: 260 additions & 0 deletions pkg/monitortests/node/kubeletselinuxlabels/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
package kubeletselinuxlabels

import (
"context"
"embed"
_ "embed"
"fmt"
"time"

"github.com/openshift/library-go/pkg/operator/resource/resourceread"
"github.com/openshift/origin/pkg/monitortestframework"
"github.com/openshift/origin/pkg/monitortestlibrary/statetracker"
exutil "github.com/openshift/origin/test/extended/util"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)

const (
msgPhaseDrain = "drained node"
msgPhaseOSUpdate = "updated operating system"
msgPhaseReboot = "rebooted and kubelet started"
testName = "[sig-node][kubelet] selinux labels on kubelet process should always be kubelet_t"
)

var (
//go:embed *.yaml
yamls embed.FS

namespace *corev1.Namespace
hostNetworkTargetService *corev1.Pod
)

func yamlOrDie(name string) []byte {
ret, err := yamls.ReadFile(name)
if err != nil {
panic(err)
}

return ret
}

func init() {
namespace = resourceread.ReadNamespaceV1OrDie(yamlOrDie("namespace.yaml"))
}

type selinuxLabelWatcher struct {
kubeClient *kubernetes.Clientset
namespaceName string
}

// This test was added to detect that selinux labels for the kubelet process
// are always kubelet_t.
// We notice that in cases of node disruption (restarts/starting) we were seeing that the labels
// regressed so we want to monitor them throughout all tests.
func NewSelinuxLabelWatcher() monitortestframework.MonitorTest {
return &selinuxLabelWatcher{}
}

func (lw *selinuxLabelWatcher) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
kubeClient, err := kubernetes.NewForConfig(adminRESTConfig)
if err != nil {
return err
}
lw.kubeClient = kubeClient
nodes, err := lw.kubeClient.CoreV1().Nodes().List(ctx, v1.ListOptions{})
if err != nil {
return err
}
actualNamespace, err := lw.kubeClient.CoreV1().Namespaces().Create(context.Background(), namespace, v1.CreateOptions{})
if err != nil {
return err
}
lw.namespaceName = actualNamespace.Name

for i, val := range nodes.Items {
podWithNodeName := selinuxPodSpec(fmt.Sprintf("label-%d", i), actualNamespace.Name, val.Name)
_, err := lw.kubeClient.CoreV1().Pods(lw.namespaceName).Create(ctx, podWithNodeName, v1.CreateOptions{})
if err != nil {
return err
}
}

// we need to have the pods ready
err = wait.PollUntilContextTimeout(ctx, 1*time.Second, 120*time.Second, true, lw.allPodsStarted)
if err != nil {
return err
}
return nil
}

func (lw *selinuxLabelWatcher) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
return nil, nil, nil
}

func (*selinuxLabelWatcher) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
var intervals monitorapi.Intervals
nodeStateTracker := statetracker.NewStateTracker(monitorapi.ConstructionOwnerNodeLifecycle, monitorapi.SourceNodeState, beginning)
locatorToMessageAnnotations := map[string]map[string]string{}

for _, event := range startingIntervals {
// TODO: dangerous assumptions here without using interval source, we ended up picking up container
// ready events because they have a node in the locator, and a reason of "Ready".
// Once the reasons marked "not ported" in the comments below are ported, we could filter here on
// event.Source to ensure we only look at what we intend.
node, ok := monitorapi.NodeFromLocator(event.Locator)
if !ok {
continue
}
reason := monitorapi.ReasonFrom(event.Message)
if len(reason) == 0 {
continue
}

roles := monitorapi.GetNodeRoles(event)

nodeLocator := monitorapi.NewLocator().NodeFromName(node)
nodeLocatorKey := nodeLocator.OldLocator()
if _, ok := locatorToMessageAnnotations[nodeLocatorKey]; !ok {
locatorToMessageAnnotations[nodeLocatorKey] = map[string]string{}
}
locatorToMessageAnnotations[nodeLocatorKey][string(monitorapi.AnnotationRoles)] = roles

drainState := statetracker.State("Drain", "NodeUpdatePhases", monitorapi.NodeUpdateReason)
osUpdateState := statetracker.State("OperatingSystemUpdate", "NodeUpdatePhases", monitorapi.NodeUpdateReason)
rebootState := statetracker.State("Reboot", "NodeUpdatePhases", monitorapi.NodeUpdateReason)

switch reason {
case "Reboot":
// Not ported, so we don't have a Source to check
mb := monitorapi.NewMessage().Reason(monitorapi.NodeUpdateReason).
HumanMessage(msgPhaseDrain).
WithAnnotation(monitorapi.AnnotationConstructed, monitorapi.ConstructionOwnerNodeLifecycle).
WithAnnotation(monitorapi.AnnotationRoles, roles).
WithAnnotation(monitorapi.AnnotationPhase, "Drain")
intervals = append(intervals, nodeStateTracker.CloseIfOpenedInterval(nodeLocator, drainState,
statetracker.SimpleInterval(monitorapi.SourceNodeState, monitorapi.Info, mb),
event.From)...)

osUpdateMB := monitorapi.NewMessage().Reason(monitorapi.NodeUpdateReason).
HumanMessage(msgPhaseOSUpdate).
WithAnnotation(monitorapi.AnnotationConstructed, monitorapi.ConstructionOwnerNodeLifecycle).
WithAnnotation(monitorapi.AnnotationRoles, roles).
WithAnnotation(monitorapi.AnnotationPhase, "OperatingSystemUpdate")
intervals = append(intervals, nodeStateTracker.CloseIfOpenedInterval(nodeLocator, osUpdateState,
statetracker.SimpleInterval(monitorapi.SourceNodeState, monitorapi.Info, osUpdateMB),
event.From)...)
nodeStateTracker.OpenInterval(nodeLocator, rebootState, event.From)
case "Starting":
// Not ported, so we don't have a Source to check
mb := monitorapi.NewMessage().Reason(monitorapi.NodeUpdateReason).
HumanMessage(msgPhaseDrain).
WithAnnotation(monitorapi.AnnotationConstructed, monitorapi.ConstructionOwnerNodeLifecycle).
WithAnnotation(monitorapi.AnnotationRoles, roles).
WithAnnotation(monitorapi.AnnotationPhase, "Drain")
intervals = append(intervals, nodeStateTracker.CloseIfOpenedInterval(nodeLocator, drainState,
statetracker.SimpleInterval(monitorapi.SourceNodeState, monitorapi.Info, mb),
event.From)...)

osUpdateMB := monitorapi.NewMessage().Reason(monitorapi.NodeUpdateReason).
HumanMessage(msgPhaseOSUpdate).
WithAnnotation(monitorapi.AnnotationConstructed, monitorapi.ConstructionOwnerNodeLifecycle).
WithAnnotation(monitorapi.AnnotationRoles, roles).
WithAnnotation(monitorapi.AnnotationPhase, "OperatingSystemUpdate")
intervals = append(intervals, nodeStateTracker.CloseIfOpenedInterval(nodeLocator, osUpdateState,
statetracker.SimpleInterval(monitorapi.SourceNodeState, monitorapi.Info, osUpdateMB),
event.From)...)

rebootMB := monitorapi.NewMessage().Reason(monitorapi.NodeUpdateReason).
HumanMessage(msgPhaseReboot).
WithAnnotation(monitorapi.AnnotationConstructed, monitorapi.ConstructionOwnerNodeLifecycle).
WithAnnotation(monitorapi.AnnotationRoles, roles).
WithAnnotation(monitorapi.AnnotationPhase, "Reboot")
intervals = append(intervals, nodeStateTracker.CloseIfOpenedInterval(nodeLocator, rebootState,
statetracker.SimpleInterval(monitorapi.SourceNodeState, monitorapi.Info, rebootMB),
event.From)...)
}
}
// Close all node intervals left hanging open:
intervals = append(intervals, nodeStateTracker.CloseAllIntervals(locatorToMessageAnnotations, end)...)

return intervals, nil
}

func (lw *selinuxLabelWatcher) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {

podsList, err := lw.kubeClient.CoreV1().Pods(lw.namespaceName).List(ctx, v1.ListOptions{})
if err != nil {
return []*junitapi.JUnitTestCase{{Name: testName, SystemErr: err.Error()}}, err
}
for _, val := range podsList.Items {
if !exutil.CheckPodIsRunning(val) {
return []*junitapi.JUnitTestCase{{Name: testName, SystemErr: "selinux label not matching expected"}}, fmt.Errorf("selinux label not matching")
}
}
return []*junitapi.JUnitTestCase{{Name: testName, SystemOut: "kubelet selinux labels match expected"}}, nil
}

func (*selinuxLabelWatcher) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
return nil
}

func (lw *selinuxLabelWatcher) Cleanup(ctx context.Context) error {

if len(lw.namespaceName) > 0 && lw.kubeClient != nil {
if err := lw.kubeClient.CoreV1().Namespaces().Delete(ctx, lw.namespaceName, v1.DeleteOptions{}); err != nil {
return err
}

startTime := time.Now()
err := wait.PollUntilContextTimeout(ctx, 15*time.Second, 20*time.Minute, true, lw.namespaceDeleted)
if err != nil {
return err
}

klog.Infof("Deleting namespace: %s took %.2f seconds", lw.namespaceName, time.Now().Sub(startTime).Seconds())

}
return nil
}

func (lw *selinuxLabelWatcher) namespaceDeleted(ctx context.Context) (bool, error) {
_, err := lw.kubeClient.CoreV1().Namespaces().Get(ctx, lw.namespaceName, v1.GetOptions{})
if apierrors.IsNotFound(err) {
return true, nil
}

if err != nil {
klog.Errorf("Error checking for deleted namespace: %s, %s", lw.namespaceName, err.Error())
return false, err
}

return false, nil
}

func (lw *selinuxLabelWatcher) allPodsStarted(ctx context.Context) (bool, error) {
pods, err := lw.kubeClient.CoreV1().Pods(lw.namespaceName).List(ctx, v1.ListOptions{})
if apierrors.IsNotFound(err) {
return true, nil
}

if err != nil {
klog.Errorf("Error checking for pods: %s, %s", lw.namespaceName, err.Error())
return false, err
}
for _, val := range pods.Items {
if !exutil.CheckPodIsReady(val) {
return false, fmt.Errorf("pod %s/%s is not ready", val.Namespace, val.Name)
}
}

return true, nil
}
17 changes: 17 additions & 0 deletions pkg/monitortests/node/kubeletselinuxlabels/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
kind: Namespace
apiVersion: v1
metadata:
generateName: e2e-selinux-label-pod-
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged
# we must update our namespace to bypass SCC so that we can avoid default mutation of our pod and SCC evaluation.
# technically we could also choose to bind an SCC, but I don't see a lot of value in doing that and we have to wait
# for a secondary cache to fill to reflect that. If we miss that cache filling, we'll get assigned a restricted on
# and fail.
security.openshift.io/disable-securitycontextconstraints: "true"
# don't let the PSA labeller mess with our namespace.
security.openshift.io/scc.podSecurityLabelSync: "false"
annotations:
workload.openshift.io/allowed: management
92 changes: 92 additions & 0 deletions pkg/monitortests/node/kubeletselinuxlabels/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package kubeletselinuxlabels

import (
images "github.com/openshift/origin/test/extended/util/image"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
serviceAccountName = "selinuxlabel"
)

const (
busyScript = `
#!/bin/bash
while true
do
echo "Busy working, cycling through the ones and zeros"
kubelet_label=$(ps -x -Z)
SUB='kubelet_t'
if [[ "$kubelet_label" != *"$SUB"* ]]; then
echo "kubelet label does not match kubelet_t"
echo "failing this pod and therefore the test"
exit 1
fi
echo "kubelet_t found in ps"
sleep 5
done
`
)

// Generate a pod spec with the termination grace period specified, and busy work lasting a little less
// then the specified grace period
// Pod tests if kubelet has the right selinux label
// and fails the pod if it doesn't.
func selinuxPodSpec(name, namespace, nodeName string) *corev1.Pod {
isZero := int64(0)
isDirectory := v1.HostPathDirectory
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
PriorityClassName: "system-cluster-critical",
NodeName: nodeName,
HostPID: true,
Containers: []corev1.Container{
{

Image: images.ShellImage(),
ImagePullPolicy: corev1.PullIfNotPresent,
Name: name,
Command: []string{
"/bin/bash",
"-c",
busyScript,
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10m"),
corev1.ResourceMemory: resource.MustParse("50Mi"),
},
},
SecurityContext: &corev1.SecurityContext{
RunAsUser: &isZero,
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "host",
MountPath: "/host",
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "host",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/",
Type: &isDirectory,
},
},
},
},
},
}
}

0 comments on commit d119620

Please sign in to comment.