Skip to content

Commit

Permalink
cmd/k8s-operator,k8s-operator: allow the operator to deploy exit node…
Browse files Browse the repository at this point in the history
…s via Connector custom resource (tailscale#10724)

cmd/k8s-operator/deploy/crds,k8s-operator/apis/v1alpha1: allow to define an exit node via Connector CR.

Make it possible to define an exit node to be deployed to a Kubernetes cluster
via Connector Custom resource.

Also changes to Connector API so that one Connector corresponds
to one Tailnet node that can be either a subnet router or an exit
node or both.

The Kubernetes operator parses Connector custom resource and,
if .spec.isExitNode is set, configures that Tailscale node deployed
for that connector as an exit node.

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
Co-authored-by: Anton Tolchanov <anton@tailscale.com>
  • Loading branch information
irbekrm and knyar committed Jan 9, 2024
1 parent 953fa80 commit 05093ea
Show file tree
Hide file tree
Showing 10 changed files with 1,055 additions and 795 deletions.
241 changes: 121 additions & 120 deletions cmd/k8s-operator/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"fmt"
"net/netip"
"slices"
"strings"
"sync"
"time"

Expand All @@ -33,17 +32,15 @@ import (
)

const (
reasonSubnetRouterCreationFailed = "SubnetRouterCreationFailed"
reasonSubnetRouterCreated = "SubnetRouterCreated"
reasonSubnetRouterCleanupFailed = "SubnetRouterCleanupFailed"
reasonSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
reasonSubnetRouterInvalid = "SubnetRouterInvalid"

messageSubnetRouterCreationFailed = "Failed creating subnet router for routes %s: %v"
messageSubnetRouterInvalid = "Subnet router is invalid: %v"
messageSubnetRouterCreated = "Created subnet router for routes %s"
messageSubnetRouterCleanupFailed = "Failed cleaning up subnet router resources: %v"
msgSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
reasonConnectorCreationFailed = "ConnectorCreationFailed"

reasonConnectorCreated = "ConnectorCreated"
reasonConnectorCleanupFailed = "ConnectorCleanupFailed"
reasonConnectorCleanupInProgress = "ConnectorCleanupInProgress"
reasonConnectorInvalid = "ConnectorInvalid"

messageConnectorCreationFailed = "Failed creating Connector: %v"
messageConnectorInvalid = "Connector is invalid: %v"

shortRequeue = time.Second * 5
)
Expand All @@ -61,181 +58,195 @@ type ConnectorReconciler struct {

mu sync.Mutex // protects following

// subnetRouters tracks the subnet routers managed by this Tailscale
// Operator instance.
subnetRouters set.Slice[types.UID]
subnetRouters set.Slice[types.UID] // for subnet routers gauge
exitNodes set.Slice[types.UID] // for exit nodes gauge
}

var (
// gaugeIngressResources tracks the number of subnet routers that we're
// currently managing.
gaugeSubnetRouterResources = clientmetric.NewGauge("k8s_subnet_router_resources")
// gaugeConnectorResources tracks the overall number of Connectors currently managed by this operator instance.
gaugeConnectorResources = clientmetric.NewGauge("k8s_connector_resources")
// gaugeConnectorSubnetRouterResources tracks the number of Connectors managed by this operator instance that are subnet routers.
gaugeConnectorSubnetRouterResources = clientmetric.NewGauge("k8s_connector_subnetrouter_resources")
// gaugeConnectorExitNodeResources tracks the number of Connectors currently managed by this operator instance that are exit nodes.
gaugeConnectorExitNodeResources = clientmetric.NewGauge("k8s_connector_exitnode_resources")
)

func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
logger := a.logger.With("connector", req.Name)
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
logger := a.logger.With("Connector", req.Name)
logger.Debugf("starting reconcile")
defer logger.Debugf("reconcile finished")

cn := new(tsapi.Connector)
err = a.Get(ctx, req.NamespacedName, cn)
if apierrors.IsNotFound(err) {
logger.Debugf("connector not found, assuming it was deleted")
logger.Debugf("Connector not found, assuming it was deleted")
return reconcile.Result{}, nil
} else if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
}
if !cn.DeletionTimestamp.IsZero() {
logger.Debugf("connector is being deleted or should not be exposed, cleaning up components")
logger.Debugf("Connector is being deleted or should not be exposed, cleaning up resources")
ix := xslices.Index(cn.Finalizers, FinalizerName)
if ix < 0 {
logger.Debugf("no finalizer, nothing to do")
return reconcile.Result{}, nil
}

if done, err := a.maybeCleanupSubnetRouter(ctx, logger, cn); err != nil {
if done, err := a.maybeCleanupConnector(ctx, logger, cn); err != nil {
return reconcile.Result{}, err
} else if !done {
logger.Debugf("cleanup not finished, will retry...")
logger.Debugf("Connector resource cleanup not yet finished, will retry...")
return reconcile.Result{RequeueAfter: shortRequeue}, nil
}

cn.Finalizers = append(cn.Finalizers[:ix], cn.Finalizers[ix+1:]...)
if err := a.Update(ctx, cn); err != nil {
return reconcile.Result{}, err
}
logger.Infof("connector resources cleaned up")
logger.Infof("Connector resources cleaned up")
return reconcile.Result{}, nil
}

oldCnStatus := cn.Status.DeepCopy()
defer func() {
if cn.Status.SubnetRouter == nil {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionUnknown, "", "", cn.Generation, a.clock, logger)
} else if cn.Status.SubnetRouter.Ready == metav1.ConditionTrue {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonSubnetRouterCreated, reasonSubnetRouterCreated, cn.Generation, a.clock, logger)
} else {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionFalse, cn.Status.SubnetRouter.Reason, cn.Status.SubnetRouter.Reason, cn.Generation, a.clock, logger)
}
setStatus := func(cn *tsapi.Connector, conditionType tsapi.ConnectorConditionType, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, status, reason, message, cn.Generation, a.clock, logger)
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
// an error encountered here should get returned by the Reconcile function
// An error encountered here should get returned by the Reconcile function.
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
err = updateErr
err = errors.Wrap(err, updateErr.Error())
}
}
}()
return res, err
}

if !slices.Contains(cn.Finalizers, FinalizerName) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
// this is a nice place to tell the operator that the high level,
// multi-reconcile operation is underway.
logger.Infof("ensuring connector is set up")
logger.Infof("ensuring Connector is set up")
cn.Finalizers = append(cn.Finalizers, FinalizerName)
if err := a.Update(ctx, cn); err != nil {
err = fmt.Errorf("failed to add finalizer: %w", err)
logger.Errorf("error adding finalizer: %v", err)
return reconcile.Result{}, err
logger.Errorf("error adding finalizer: %w", err)
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, reasonConnectorCreationFailed)
}
}

// A Connector with unset .spec.subnetRouter and unset
// cn.spec.subnetRouter.Routes will be rejected at apply time (because
// these fields are set as required by our CRD validation). This check
// is here for if our CRD validation breaks unnoticed we don't crash the
// operator with nil pointer exception.
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
return reconcile.Result{}, nil
if err := a.validate(cn); err != nil {
logger.Errorf("error validating Connector spec: %w", err)
message := fmt.Sprintf(messageConnectorInvalid, err)
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
}

if err := validateSubnetRouter(*cn.Spec.SubnetRouter); err != nil {
msg := fmt.Sprintf(messageSubnetRouterInvalid, err)
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Ready: metav1.ConditionFalse,
Reason: reasonSubnetRouterInvalid,
Message: msg,
}
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterInvalid, msg)
return reconcile.Result{}, nil
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
logger.Errorf("error creating Connector resources: %w", err)
message := fmt.Sprintf(messageConnectorCreationFailed, err)
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
}

var sb strings.Builder
sb.WriteString(string(cn.Spec.SubnetRouter.Routes[0]))
for _, r := range cn.Spec.SubnetRouter.Routes[1:] {
sb.WriteString(fmt.Sprintf(",%s", r))
logger.Info("Connector resources synced")
cn.Status.IsExitNode = cn.Spec.ExitNode
if cn.Spec.SubnetRouter != nil {
cn.Status.SubnetRoutes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
}
cidrsS := sb.String()
logger.Debugf("ensuring a subnet router is deployed")
err = a.maybeProvisionSubnetRouter(ctx, logger, cn, cidrsS)
if err != nil {
msg := fmt.Sprintf(messageSubnetRouterCreationFailed, cidrsS, err)
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Ready: metav1.ConditionFalse,
Reason: reasonSubnetRouterCreationFailed,
Message: msg,
}
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterCreationFailed, msg)
return reconcile.Result{}, err
cn.Status.SubnetRoutes = ""
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
}

// maybeProvisionConnector ensures that any new resources required for this
// Connector instance are deployed to the cluster.
func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) error {
hostname := cn.Name + "-connector"
if cn.Spec.Hostname != "" {
hostname = string(cn.Spec.Hostname)
}
crl := childResourceLabels(cn.Name, a.tsnamespace, "connector")
sts := &tailscaleSTSConfig{
ParentResourceName: cn.Name,
ParentResourceUID: string(cn.UID),
Hostname: hostname,
ChildResourceLabels: crl,
Tags: cn.Spec.Tags.Stringify(),
Connector: &connector{
isExitNode: cn.Spec.ExitNode,
},
}

if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
sts.Connector.routes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
}

a.mu.Lock()
if sts.Connector.isExitNode {
a.exitNodes.Add(cn.UID)
} else {
a.exitNodes.Remove(cn.UID)
}
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Routes: cidrsS,
Ready: metav1.ConditionTrue,
Reason: reasonSubnetRouterCreated,
Message: fmt.Sprintf(messageSubnetRouterCreated, cidrsS),
if sts.Connector.routes != "" {
a.subnetRouters.Add(cn.GetUID())
} else {
a.subnetRouters.Remove(cn.GetUID())
}
return reconcile.Result{}, nil
a.mu.Unlock()
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
var connectors set.Slice[types.UID]
connectors.AddSlice(a.exitNodes.Slice())
connectors.AddSlice(a.subnetRouters.Slice())
gaugeConnectorResources.Set(int64(connectors.Len()))

_, err := a.ssr.Provision(ctx, logger, sts)
return err
}

func (a *ConnectorReconciler) maybeCleanupSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")); err != nil {
return false, fmt.Errorf("failed to cleanup: %w", err)
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector")); err != nil {
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
} else if !done {
logger.Debugf("cleanup not done yet, waiting for next reconcile")
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")
return false, nil
}

// Unlike most log entries in the reconcile loop, this will get printed
// exactly once at the very end of cleanup, because the final step of
// cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early.
logger.Infof("cleaned up subnet router")
logger.Infof("cleaned up Connector resources")
a.mu.Lock()
defer a.mu.Unlock()
a.subnetRouters.Remove(cn.UID)
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
a.exitNodes.Remove(cn.UID)
a.mu.Unlock()
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
var connectors set.Slice[types.UID]
connectors.AddSlice(a.exitNodes.Slice())
connectors.AddSlice(a.subnetRouters.Slice())
gaugeConnectorResources.Set(int64(connectors.Len()))
return true, nil
}

// maybeProvisionSubnetRouter maybe deploys subnet router that exposes a subset of cluster cidrs to the tailnet
func (a *ConnectorReconciler) maybeProvisionSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector, cidrs string) error {
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
func (a *ConnectorReconciler) validate(cn *tsapi.Connector) error {
// Connector fields are already validated at apply time with CEL validation
// on custom resource fields. The checks here are a backup in case the
// CEL validation breaks without us noticing.
if !(cn.Spec.SubnetRouter != nil || cn.Spec.ExitNode) {
return errors.New("invalid spec: a Connector must expose subnet routes or act as an exit node (or both)")
}
if cn.Spec.SubnetRouter == nil {
return nil
}
a.mu.Lock()
a.subnetRouters.Add(cn.UID)
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
a.mu.Unlock()
return validateSubnetRouter(cn.Spec.SubnetRouter)
}

crl := childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")
hostname := hostnameForSubnetRouter(cn)
sts := &tailscaleSTSConfig{
ParentResourceName: cn.Name,
ParentResourceUID: string(cn.UID),
Hostname: hostname,
ChildResourceLabels: crl,
Routes: cidrs,
}
for _, tag := range cn.Spec.SubnetRouter.Tags {
sts.Tags = append(sts.Tags, string(tag))
func validateSubnetRouter(sb *tsapi.SubnetRouter) error {
if len(sb.AdvertiseRoutes) < 1 {
return errors.New("invalid subnet router spec: no routes defined")
}

_, err := a.ssr.Provision(ctx, logger, sts)

return err
}
func validateSubnetRouter(sb tsapi.SubnetRouter) error {
var err error
for _, route := range sb.Routes {
for _, route := range sb.AdvertiseRoutes {
pfx, e := netip.ParsePrefix(string(route))
if e != nil {
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
Expand All @@ -247,13 +258,3 @@ func validateSubnetRouter(sb tsapi.SubnetRouter) error {
}
return err
}

func hostnameForSubnetRouter(cn *tsapi.Connector) string {
if cn.Spec.SubnetRouter == nil {
return ""
}
if cn.Spec.SubnetRouter.Hostname != "" {
return string(cn.Spec.SubnetRouter.Hostname)
}
return cn.Name + "-" + "subnetrouter"
}

0 comments on commit 05093ea

Please sign in to comment.