Skip to content

Commit

Permalink
fix: ensure that Talos runs in a pod (container)
Browse files Browse the repository at this point in the history
Drop the Kubernetes manifests as static files clean up (this is only
needed for upgrades from 1.2.x).

Fix Talos handling of cgroup hierarchy: if started in container in a
non-root cgroup hiearachy, use that to handle proper cgroup paths.

Add a test for a simple TinK mode (Talos-in-Kubernetes).

Update the docs.

Fixes #8274

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Feb 20, 2024
1 parent 9dbc339 commit 66f3ffd
Show file tree
Hide file tree
Showing 19 changed files with 895 additions and 51 deletions.
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -24,6 +24,7 @@ require (
k8s.io/kube-scheduler v0.30.0-alpha.2
k8s.io/kubectl v0.30.0-alpha.2
k8s.io/kubelet v0.30.0-alpha.2
k8s.io/pod-security-admission v0.30.0-alpha.2
)

require (
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -1271,6 +1271,8 @@ k8s.io/kubectl v0.30.0-alpha.2 h1:fw+2Ijv4gqQdFgzYK1nJJR3MFopCdBAZEjnETcM+y4Y=
k8s.io/kubectl v0.30.0-alpha.2/go.mod h1:74X1grqoxhb93ZLxjQo8FurqpWdSAgnNYiUhyYYiWoA=
k8s.io/kubelet v0.30.0-alpha.2 h1:35RaAFKiBA3iBGg8fxFefDdKrBhcakTpbPxZdHMRml8=
k8s.io/kubelet v0.30.0-alpha.2/go.mod h1:wP9oBhAREIMKYXiV7xK92P9OfeMi7TFHGm+EbYtn5ZQ=
k8s.io/pod-security-admission v0.30.0-alpha.2 h1:q2gKZJxHk4Uf0SBxnFLu34ZbbwW7Peml903Tw8jC7tA=
k8s.io/pod-security-admission v0.30.0-alpha.2/go.mod h1:v+SIoDPBLpZRM4yG0+rZRvbNXHYfPmpN1PcdZaOzZe8=
k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.69 h1:N0m3tKYbkRMmDobh/47ngz+AWeV7PcfXMDi8xu3Vrag=
Expand Down
4 changes: 2 additions & 2 deletions hack/test/e2e.sh
Expand Up @@ -150,7 +150,7 @@ function run_talos_integration_test {
;;
esac

"${INTEGRATION_TEST}" -test.v -talos.failfast -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
"${INTEGRATION_TEST}" -test.v -talos.failfast -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" -talos.image "${REGISTRY}/siderolabs/talos" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
}

function run_talos_integration_test_docker {
Expand All @@ -170,7 +170,7 @@ function run_talos_integration_test_docker {
;;
esac

"${INTEGRATION_TEST}" -test.v -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.k8sendpoint 127.0.0.1:6443 -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
"${INTEGRATION_TEST}" -test.v -talos.talosctlpath "${TALOSCTL}" -talos.kubectlpath "${KUBECTL}" -talos.k8sendpoint 127.0.0.1:6443 -talos.provisioner "${PROVISIONER}" -talos.name "${CLUSTER_NAME}" -talos.image "${REGISTRY}/siderolabs/talos" "${EXTRA_TEST_ARGS[@]}" "${TEST_RUN[@]}" "${TEST_SHORT[@]}"
}

function run_kubernetes_conformance_test {
Expand Down
7 changes: 4 additions & 3 deletions internal/app/machined/pkg/controllers/k8s/kubelet_spec.go
Expand Up @@ -25,6 +25,7 @@ import (
kubeletconfig "k8s.io/kubelet/config/v1beta1"

v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/pkg/cgroup"
"github.com/siderolabs/talos/pkg/argsbuilder"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/kubelet"
Expand Down Expand Up @@ -275,9 +276,9 @@ func NewKubeletConfiguration(cfgSpec *k8s.KubeletConfigSpec, kubeletVersion comp
config.Authorization = kubeletconfig.KubeletAuthorization{
Mode: kubeletconfig.KubeletAuthorizationModeWebhook,
}
config.CgroupRoot = "/"
config.SystemCgroups = constants.CgroupSystem
config.KubeletCgroups = constants.CgroupKubelet
config.CgroupRoot = cgroup.Root()
config.SystemCgroups = cgroup.Path(constants.CgroupSystem)
config.KubeletCgroups = cgroup.Path(constants.CgroupKubelet)
config.RotateCertificates = true
config.ProtectKernelDefaults = true

Expand Down
Expand Up @@ -245,9 +245,6 @@ func (*Sequencer) Boot(r runtime.Runtime) []runtime.Phase {
r.State().Platform().Mode() != runtime.ModeContainer,
"overlay",
MountOverlayFilesystems,
).Append(
"legacyCleanup",
CleanupLegacyStaticPodFiles,
).AppendWhen(
r.State().Platform().Mode() != runtime.ModeContainer,
"udevSetup",
Expand Down
Expand Up @@ -53,6 +53,7 @@ import (
"github.com/siderolabs/talos/internal/app/machined/pkg/system"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/services"
"github.com/siderolabs/talos/internal/pkg/cgroup"
"github.com/siderolabs/talos/internal/pkg/cri"
"github.com/siderolabs/talos/internal/pkg/environment"
"github.com/siderolabs/talos/internal/pkg/etcd"
Expand Down Expand Up @@ -164,6 +165,11 @@ func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, stri
}
}

// Initialize cgroups root path.
if err = cgroup.InitRoot(); err != nil {
return fmt.Errorf("error initializing cgroups root path: %w", err)
}

groups := []struct {
name string
resources *cgroup2.Resources
Expand All @@ -190,6 +196,10 @@ func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, stri
name: constants.CgroupSystemRuntime,
resources: &cgroup2.Resources{},
},
{
name: constants.CgroupUdevd,
resources: &cgroup2.Resources{},
},
{
name: constants.CgroupPodRuntime,
resources: &cgroup2.Resources{
Expand Down Expand Up @@ -228,7 +238,7 @@ func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, stri
resources = &cgroup2.Resources{}
}

cg, err := cgroup2.NewManager(constants.CgroupMountPath, c.name, resources)
cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(c.name), resources)
if err != nil {
return fmt.Errorf("failed to create cgroup: %w", err)
}
Expand Down Expand Up @@ -2200,42 +2210,6 @@ func ForceCleanup(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
}, "forceCleanup"
}

// CleanupLegacyStaticPodFiles removes legacy static pod files in the manifests directory.
//
// This part of transition to Talos 1.3.0, as Talos 1.3.0 serves static pods from internal web server.
func CleanupLegacyStaticPodFiles(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
manifestDir, err := os.Open(constants.ManifestsDirectory)
if err != nil {
return fmt.Errorf("error opening manifests directory: %w", err)
}

defer manifestDir.Close() //nolint:errcheck

manifests, err := manifestDir.Readdirnames(0)
if err != nil {
return fmt.Errorf("error listing manifests: %w", err)
}

for _, manifest := range manifests {
// skip manifests not owned by Talos
if !strings.HasPrefix(manifest, constants.TalosManifestPrefix) {
continue
}

podPath := filepath.Join(constants.ManifestsDirectory, manifest)

logger.Printf("cleaning up legacy static pod file %q", podPath)

if err = os.Remove(podPath); err != nil {
return fmt.Errorf("error cleaning up legacy static pod file: %w", err)
}
}

return nil
}, "cleanupLegacyStaticPodFiles"
}

// ReloadMeta reloads META partition after disk mount, installer run, etc.
//
//nolint:gocyclo
Expand Down
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/runner"
"github.com/siderolabs/talos/internal/pkg/cgroup"
)

// containerdRunner is a runner.Runner that runs container in containerd.
Expand Down Expand Up @@ -319,7 +320,7 @@ func (c *containerdRunner) newOCISpecOpts(image oci.Image) []oci.SpecOpts {
if c.opts.CgroupPath != "" {
specOpts = append(
specOpts,
oci.WithCgroup(c.opts.CgroupPath),
oci.WithCgroup(cgroup.Path(c.opts.CgroupPath)),
)
}

Expand Down
3 changes: 2 additions & 1 deletion internal/app/machined/pkg/system/runner/process/process.go
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/runner"
"github.com/siderolabs/talos/internal/pkg/cgroup"
"github.com/siderolabs/talos/pkg/machinery/constants"
)

Expand Down Expand Up @@ -87,7 +88,7 @@ func (p *processRunner) build() (commandWrapper, error) {
args := []string{
fmt.Sprintf("-name=%s", p.args.ID),
fmt.Sprintf("-dropped-caps=%s", strings.Join(p.opts.DroppedCapabilities, ",")),
fmt.Sprintf("-cgroup-path=%s", p.opts.CgroupPath),
fmt.Sprintf("-cgroup-path=%s", cgroup.Path(p.opts.CgroupPath)),
fmt.Sprintf("-oom-score=%d", p.opts.OOMScoreAdj),
fmt.Sprintf("-uid=%d", p.opts.UID),
}
Expand Down
2 changes: 1 addition & 1 deletion internal/app/machined/pkg/system/services/udevd.go
Expand Up @@ -83,7 +83,7 @@ func (c *Udevd) Runner(r runtime.Runtime) (runner.Runner, error) {
debug,
args,
runner.WithLoggingManager(r.Logging()),
runner.WithCgroupPath(constants.CgroupSystemRuntime),
runner.WithCgroupPath(constants.CgroupUdevd),
runner.WithDroppedCapabilities(constants.UdevdDroppedCapabilities),
),
restart.WithType(restart.Forever),
Expand Down
2 changes: 2 additions & 0 deletions internal/integration/base/base.go
Expand Up @@ -39,6 +39,8 @@ type TalosSuite struct {
ExtensionsNvidia bool
// TrustedBoot tells if the cluster is secure booted and disks are encrypted
TrustedBoot bool
// TalosImage is the image name for 'talos' container.
TalosImage string

discoveredNodes cluster.Info
}
Expand Down
152 changes: 152 additions & 0 deletions internal/integration/base/k8s.go
Expand Up @@ -7,9 +7,13 @@
package base

import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"slices"
"time"

"github.com/siderolabs/gen/xslices"
Expand All @@ -18,14 +22,23 @@ import (
eventsv1 "k8s.io/api/events/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/discovery"
"k8s.io/client-go/discovery/cached/memory"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/restmapper"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
"k8s.io/client-go/tools/remotecommand"
watchtools "k8s.io/client-go/tools/watch"
"k8s.io/kubectl/pkg/scheme"

taloskubernetes "github.com/siderolabs/talos/pkg/kubernetes"
Expand All @@ -39,6 +52,7 @@ type K8sSuite struct {
DynamicClient dynamic.Interface
DiscoveryClient *discovery.DiscoveryClient
RestConfig *rest.Config
Mapper *restmapper.DeferredDiscoveryRESTMapper
}

// SetupSuite initializes Kubernetes client.
Expand Down Expand Up @@ -68,6 +82,8 @@ func (k8sSuite *K8sSuite) SetupSuite() {

k8sSuite.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(config)
k8sSuite.Require().NoError(err)

k8sSuite.Mapper = restmapper.NewDeferredDiscoveryRESTMapper(memory.NewMemCacheClient(k8sSuite.DiscoveryClient))
}

// GetK8sNodeByInternalIP returns the kubernetes node by its internal ip or error if it is not found.
Expand Down Expand Up @@ -246,3 +262,139 @@ func (k8sSuite *K8sSuite) GetPodsWithLabel(ctx context.Context, namespace, label

return podList, nil
}

// ParseManifests parses YAML manifest bytes into unstructured objects.
func (k8sSuite *K8sSuite) ParseManifests(manifests []byte) []unstructured.Unstructured {
reader := yaml.NewYAMLReader(bufio.NewReader(bytes.NewReader(manifests)))

var parsedManifests []unstructured.Unstructured

for {
yamlManifest, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}

k8sSuite.Require().NoError(err)
}

yamlManifest = bytes.TrimSpace(yamlManifest)

if len(yamlManifest) == 0 {
continue
}

jsonManifest, err := yaml.ToJSON(yamlManifest)
if err != nil {
k8sSuite.Require().NoError(err, "error converting manifest to JSON")
}

if bytes.Equal(jsonManifest, []byte("null")) || bytes.Equal(jsonManifest, []byte("{}")) {
// skip YAML docs which contain only comments
continue
}

var obj unstructured.Unstructured

if err = json.Unmarshal(jsonManifest, &obj); err != nil {
k8sSuite.Require().NoError(err, "error loading JSON manifest into unstructured")
}

parsedManifests = append(parsedManifests, obj)
}

return parsedManifests
}

// ApplyManifests applies the given manifests to the Kubernetes cluster.
func (k8sSuite *K8sSuite) ApplyManifests(ctx context.Context, manifests []unstructured.Unstructured) {
for _, obj := range manifests {
mapping, err := k8sSuite.Mapper.RESTMapping(obj.GetObjectKind().GroupVersionKind().GroupKind(), obj.GetObjectKind().GroupVersionKind().Version)
if err != nil {
k8sSuite.Require().NoError(err, "error creating mapping for object %s", obj.GetName())
}

dr := k8sSuite.DynamicClient.Resource(mapping.Resource).Namespace(obj.GetNamespace())

_, err = dr.Create(ctx, &obj, metav1.CreateOptions{})
k8sSuite.Require().NoError(err, "error creating object %s", obj.GetName())

k8sSuite.T().Logf("created object %s/%s/%s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName())
}
}

// DeleteManifests deletes the given manifests from the Kubernetes cluster.
func (k8sSuite *K8sSuite) DeleteManifests(ctx context.Context, manifests []unstructured.Unstructured) {
// process in reverse orderd
manifests = slices.Clone(manifests)
slices.Reverse(manifests)

for _, obj := range manifests {
mapping, err := k8sSuite.Mapper.RESTMapping(obj.GetObjectKind().GroupVersionKind().GroupKind(), obj.GetObjectKind().GroupVersionKind().Version)
if err != nil {
k8sSuite.Require().NoError(err, "error creating mapping for object %s", obj.GetName())
}

dr := k8sSuite.DynamicClient.Resource(mapping.Resource).Namespace(obj.GetNamespace())

err = dr.Delete(ctx, obj.GetName(), metav1.DeleteOptions{})
if errors.IsNotFound(err) {
continue
}

k8sSuite.Require().NoError(err, "error deleting object %s", obj.GetName())

// wait for the object to be deleted
fieldSelector := fields.OneTermEqualSelector("metadata.name", obj.GetName()).String()
lw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = fieldSelector

return dr.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = fieldSelector

return dr.Watch(ctx, options)
},
}

preconditionFunc := func(store cache.Store) (bool, error) {
var exists bool

_, exists, err = store.Get(&metav1.ObjectMeta{Namespace: obj.GetNamespace(), Name: obj.GetName()})
if err != nil {
return true, err
}

if !exists {
// since we're looking for it to disappear we just return here if it no longer exists
return true, nil
}

return false, nil
}

_, err = watchtools.UntilWithSync(ctx, lw, &unstructured.Unstructured{}, preconditionFunc, func(event watch.Event) (bool, error) {
return event.Type == watch.Deleted, nil
})

k8sSuite.Require().NoError(err, "error waiting for the object to be deleted %s", obj.GetName())

k8sSuite.T().Logf("deleted object %s/%s/%s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName())
}
}

// ToUnstructured converts the given runtime.Object to unstructured.Unstructured.
func (k8sSuite *K8sSuite) ToUnstructured(obj runtime.Object) unstructured.Unstructured {
unstructuredObj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
if err != nil {
k8sSuite.Require().NoError(err, "error converting object to unstructured")
}

u := unstructured.Unstructured{Object: unstructuredObj}
u.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())

return u
}

0 comments on commit 66f3ffd

Please sign in to comment.