Skip to content

Commit

Permalink
fix: use proper types in the machine status and snapshot controllers
Browse files Browse the repository at this point in the history
The return type should be input type, not the output type.
Also leverage runner equality functions to not to restart the collectors if
nothing changed.

Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
  • Loading branch information
Unix4ever committed Jun 14, 2024
1 parent a2b7b53 commit 22bb2cc
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 49 deletions.
6 changes: 5 additions & 1 deletion internal/backend/runtime/omni/controllers/helpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ func HandleInput[T generic.ResourceWithRD, S generic.ResourceWithRD](ctx context
return zero, err
}

return zero, nil
if res.Metadata().Phase() == resource.PhaseTearingDown {
return zero, nil
}

return res, nil
}

if !res.Metadata().Finalizers().Has(finalizer) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ func (runner *Runner[T, S]) StartTask(ctx context.Context, logger *zap.Logger, i
runner.mu.Lock()
defer runner.mu.Unlock()

running, ok := runner.running[id]

if ok {
if runner.equalityFunc(spec, running.spec) {
return
}

logger.Debug("replacing task", zap.String("task", id))

runner.stopTask(id)
}

runner.running[id] = New(logger, spec, task)

logger.Debug("starting task", zap.String("task", id))
Expand All @@ -62,12 +74,16 @@ func (runner *Runner[T, S]) StopTask(logger *zap.Logger, id string) {
runner.mu.Lock()
defer runner.mu.Unlock()

logger.Debug("stopping task", zap.String("task", id))

runner.stopTask(id)
}

func (runner *Runner[T, S]) stopTask(id string) {
if _, ok := runner.running[id]; !ok {
return
}

logger.Debug("stopping task", zap.String("task", id))

runner.running[id].Stop()
delete(runner.running, id)
}
Expand All @@ -82,13 +98,11 @@ func (runner *Runner[T, S]) Reconcile(ctx context.Context, logger *zap.Logger, s
if _, exists := shouldRun[id]; !exists {
logger.Debug("stopping task", zap.String("task", id))

runner.running[id].Stop()
delete(runner.running, id)
runner.stopTask(id)
} else if !runner.equalityFunc(shouldRun[id], runner.running[id].Spec()) {
logger.Debug("replacing task", zap.String("task", id))

runner.running[id].Stop()
delete(runner.running, id)
runner.stopTask(id)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func (ctrl *MachineSetNodeController) reconcileMachineSet(

switch machineClassCfg.AllocationType {
case specs.MachineSetSpec_MachineClass_Unlimited:
return ctrl.createNodes(ctx, r, machineSet, machineClass, allMachineStatuses, math.MaxInt32)
return ctrl.createNodes(ctx, r, machineSet, machineClass, allMachineStatuses, math.MaxInt32, logger)
case specs.MachineSetSpec_MachineClass_Static:
diff := int(machineClassCfg.MachineCount) - existingMachineSetNodes.Len()

Expand All @@ -216,12 +216,12 @@ func (ctrl *MachineSetNodeController) reconcileMachineSet(
if diff < 0 {
logger.Info("scaling machine set down", zap.Int("pending", -diff), zap.String("machine_set", machineSet.Metadata().ID()))

return ctrl.deleteNodes(ctx, r, existingMachineSetNodes, machineStatusMap, -diff)
return ctrl.deleteNodes(ctx, r, existingMachineSetNodes, machineStatusMap, -diff, logger)
}

logger.Info("scaling machine set up", zap.Int("pending", diff), zap.String("machine_set", machineSet.Metadata().ID()))

return ctrl.createNodes(ctx, r, machineSet, machineClass, allMachineStatuses, diff)
return ctrl.createNodes(ctx, r, machineSet, machineClass, allMachineStatuses, diff, logger)
}

return nil
Expand All @@ -235,6 +235,7 @@ func (ctrl *MachineSetNodeController) createNodes(
machineClass *omni.MachineClass,
allMachineStatuses safe.List[*omni.MachineStatus],
count int,
logger *zap.Logger,
) error {
selectors, err := labels.ParseSelectors(machineClass.TypedSpec().Value.MatchLabels)
if err != nil {
Expand Down Expand Up @@ -303,6 +304,8 @@ func (ctrl *MachineSetNodeController) createNodes(
return err
}

logger.Info("created machine set node", zap.String("machine", id))

created++
if created == count {
return nil
Expand All @@ -319,6 +322,7 @@ func (ctrl *MachineSetNodeController) deleteNodes(
machineSetNodes safe.List[*omni.MachineSetNode],
machineStatuses map[string]*omni.MachineStatus,
machinesToDestroyCount int,
logger *zap.Logger,
) error {
usedMachineSetNodes, err := safe.Map(machineSetNodes, func(m *omni.MachineSetNode) (*omni.MachineSetNode, error) {
return m, nil
Expand Down Expand Up @@ -365,6 +369,8 @@ func (ctrl *MachineSetNodeController) deleteNodes(
return err
}

logger.Info("removed machine set node", zap.String("machine", usedMachineSetNodes[i].Metadata().ID()))

if !ready {
return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func (suite *MachineSetNodeSuite) TestReconcile() {
map[string]string{
omni.MachineStatusLabelCPU: "AMD",
omni.MachineStatusLabelAvailable: "",
"userlabel": "value",
},
)

Expand Down Expand Up @@ -127,7 +128,7 @@ func (suite *MachineSetNodeSuite) TestReconcile() {
machineSet.Metadata().Labels().Set(omni.LabelCluster, cluster.Metadata().ID())
machineSet.Metadata().Labels().Set(omni.LabelWorkerRole, "")

machineClass := newMachineClass(fmt.Sprintf("%s==amd64", omni.MachineStatusLabelArch))
machineClass := newMachineClass(fmt.Sprintf("%s==amd64", omni.MachineStatusLabelArch), "userlabel=value")

machineSet.TypedSpec().Value.MachineClass = &specs.MachineSetSpec_MachineClass{
Name: machineClass.Metadata().ID(),
Expand All @@ -138,6 +139,9 @@ func (suite *MachineSetNodeSuite) TestReconcile() {
suite.Require().NoError(suite.state.Create(ctx, machineSet))

assertMachineSetNode(machines[0])
assertNoMachineSetNode(machines[1])
assertNoMachineSetNode(machines[2])
assertNoMachineSetNode(machines[3])

machineClass = newMachineClass(fmt.Sprintf("%s==AMD", omni.MachineStatusLabelCPU))

Expand Down
48 changes: 26 additions & 22 deletions internal/backend/runtime/omni/controllers/omni/machine_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (ctrl *MachineStatusController) MapInput(ctx context.Context, _ *zap.Logger
fallthrough
case omni.MachineStatusSnapshotType:
return []resource.Pointer{
omni.NewMachineStatus(resources.DefaultNamespace, ptr.ID()).Metadata(),
omni.NewMachine(resources.DefaultNamespace, ptr.ID()).Metadata(),
}, nil
case omni.TalosConfigType:
machines, err := safe.ReaderListAll[*omni.ClusterMachineStatus](ctx, r, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, ptr.ID())))
Expand All @@ -163,7 +163,7 @@ func (ctrl *MachineStatusController) MapInput(ctx context.Context, _ *zap.Logger
res := make([]resource.Pointer, 0, machines.Len())

machines.ForEach(func(r *omni.ClusterMachineStatus) {
res = append(res, omni.NewMachineStatus(resources.DefaultNamespace, r.Metadata().ID()).Metadata())
res = append(res, omni.NewMachine(resources.DefaultNamespace, r.Metadata().ID()).Metadata())
})

return res, nil
Expand All @@ -178,7 +178,7 @@ func (ctrl *MachineStatusController) MapInput(ctx context.Context, _ *zap.Logger
func (ctrl *MachineStatusController) Reconcile(ctx context.Context,
logger *zap.Logger, r controller.QRuntime, ptr resource.Pointer,
) error {
machine, err := safe.ReaderGetByID[*omni.Machine](ctx, r, ptr.ID())
machine, err := safe.ReaderGet[*omni.Machine](ctx, r, omni.NewMachine(ptr.Namespace(), ptr.ID()).Metadata())
if err != nil {
if state.IsNotFoundError(err) {
return nil
Expand Down Expand Up @@ -217,24 +217,28 @@ func (ctrl *MachineStatusController) reconcileRunning(ctx context.Context, r con
maintenanceMode = inputs.machineStatusSnapshot.TypedSpec().Value.MachineStatus.Stage == machineapi.MachineStatusEvent_MAINTENANCE
}

ctrl.runner.StopTask(logger, machine.Metadata().ID())
var params *siderolink.ConnectionParams

if machine.TypedSpec().Value.Connected {
var params *siderolink.ConnectionParams
params, err = safe.ReaderGetByID[*siderolink.ConnectionParams](ctx, r, siderolink.ConfigID)
if err != nil {
return fmt.Errorf("error reading connection params: %w", err)
}

params, err = safe.ReaderGetByID[*siderolink.ConnectionParams](ctx, r, siderolink.ConfigID)
if err != nil {
return fmt.Errorf("error reading connection params: %w", err)
}
spec := machinetask.CollectTaskSpec{
Endpoint: machine.TypedSpec().Value.ManagementAddress,
TalosConfig: inputs.talosConfig,
MaintenanceMode: inputs.talosConfig == nil || maintenanceMode,
MachineID: machine.Metadata().ID(),
MachineLabels: inputs.machineLabels,
DefaultSchematicKernelArgs: siderolink.KernelArgs(params),
}

ctrl.runner.StartTask(ctx, logger, machine.Metadata().ID(), machinetask.CollectTaskSpec{
Endpoint: machine.TypedSpec().Value.ManagementAddress,
TalosConfig: inputs.talosConfig,
MaintenanceMode: inputs.talosConfig == nil || maintenanceMode,
MachineID: machine.Metadata().ID(),
MachineLabels: inputs.machineLabels,
DefaultSchematicKernelArgs: siderolink.KernelArgs(params),
}, ctrl.notifyCh)
if !machine.TypedSpec().Value.Connected {
ctrl.runner.StopTask(logger, machine.Metadata().ID())
}

if machine.TypedSpec().Value.Connected {
ctrl.runner.StartTask(ctx, logger, machine.Metadata().ID(), spec, ctrl.notifyCh)
}

return safe.WriterModify(ctx, r, omni.NewMachineStatus(resources.DefaultNamespace, machine.Metadata().ID()), func(m *omni.MachineStatus) error {
Expand Down Expand Up @@ -465,13 +469,13 @@ func (ctrl *MachineStatusController) handleInputs(ctx context.Context, r control
return in, err
}

in.machineSetNode, err = safe.ReaderGetByID[*omni.MachineSetNode](ctx, r, machine.Metadata().ID())
if err != nil && !state.IsNotFoundError(err) {
in.machineSetNode, err = helpers.HandleInput[*omni.MachineSetNode](ctx, r, ctrl.Name(), machine)
if err != nil {
return in, err
}

if in.clusterMachineStatus != nil {
clusterName, ok := in.clusterMachineStatus.Metadata().Labels().Get(omni.LabelCluster)
if in.machineSetNode != nil {
clusterName, ok := in.machineSetNode.Metadata().Labels().Get(omni.LabelCluster)
if ok {
in.talosConfig, err = safe.ReaderGetByID[*omni.TalosConfig](ctx, r, clusterName)
if err != nil && !state.IsNotFoundError(err) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (ctrl *MachineStatusSnapshotController) MapInput(ctx context.Context, _ *za
fallthrough
case omni.MachineType:
return []resource.Pointer{
omni.NewMachineStatusSnapshot(resources.DefaultNamespace, ptr.ID()).Metadata(),
omni.NewMachine(resources.DefaultNamespace, ptr.ID()).Metadata(),
}, nil
case omni.TalosConfigType:
machines, err := safe.ReaderListAll[*omni.ClusterMachine](ctx, r, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, ptr.ID())))
Expand All @@ -123,7 +123,7 @@ func (ctrl *MachineStatusSnapshotController) MapInput(ctx context.Context, _ *za
res := make([]resource.Pointer, 0, machines.Len())

machines.ForEach(func(r *omni.ClusterMachine) {
res = append(res, omni.NewMachineStatusSnapshot(resources.DefaultNamespace, r.Metadata().ID()).Metadata())
res = append(res, omni.NewMachine(resources.DefaultNamespace, r.Metadata().ID()).Metadata())
})

return res, nil
Expand All @@ -136,7 +136,7 @@ func (ctrl *MachineStatusSnapshotController) MapInput(ctx context.Context, _ *za
func (ctrl *MachineStatusSnapshotController) Reconcile(ctx context.Context,
logger *zap.Logger, r controller.QRuntime, ptr resource.Pointer,
) error {
machine, err := safe.ReaderGetByID[*omni.Machine](ctx, r, ptr.ID())
machine, err := safe.ReaderGet[*omni.Machine](ctx, r, omni.NewMachine(ptr.Namespace(), ptr.ID()).Metadata())
if err != nil {
if state.IsNotFoundError(err) {
return nil
Expand All @@ -159,8 +159,6 @@ func (ctrl *MachineStatusSnapshotController) reconcileRunning(ctx context.Contex
}
}

ctrl.runner.StopTask(logger, machine.Metadata().ID())

clusterMachine, err := helpers.HandleInput[*omni.ClusterMachine](ctx, r, ctrl.Name(), machine)
if err != nil {
return err
Expand All @@ -178,6 +176,10 @@ func (ctrl *MachineStatusSnapshotController) reconcileRunning(ctx context.Contex
}
}

if !machine.TypedSpec().Value.Connected {
ctrl.runner.StopTask(logger, machine.Metadata().ID())
}

if machine.TypedSpec().Value.Connected {
ctrl.runner.StartTask(ctx, logger, machine.Metadata().ID(), snapshot.CollectTaskSpec{
Endpoint: machine.TypedSpec().Value.ManagementAddress,
Expand Down
Loading

0 comments on commit 22bb2cc

Please sign in to comment.