Skip to content

Commit

Permalink
feat: provide a way to configure IPMI PXE method
Browse files Browse the repository at this point in the history
Fixes #274

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
  • Loading branch information
smira committed Dec 9, 2021
1 parent 2ff14c4 commit 4cfdeda
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 61 deletions.
1 change: 1 addition & 0 deletions app/sidero-controller-manager/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ spec:
- --insecure-wipe=${SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE:=true}
- --auto-bmc-setup=${SIDERO_CONTROLLER_MANAGER_AUTO_BMC_SETUP:=true}
- --server-reboot-timeout=${SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT:=20m}
- --ipmi-pxe-method=${SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD:=uefi}
- --test-power-simulated-explicit-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_EXPLICIT_FAILURE:=0}
- --test-power-simulated-silent-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_SILENT_FAILURE:=0}
image: controller:latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

infrav1 "github.com/talos-systems/sidero/app/caps-controller-manager/api/v1alpha3"
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)
Expand All @@ -49,6 +50,7 @@ type ServerReconciler struct {
Recorder record.EventRecorder

RebootTimeout time.Duration
PXEMode metal.PXEMode
}

// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -79,7 +81,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return ctrl.Result{}, err
}

mgmtClient, err := metal.NewManagementClient(ctx, r.Client, &s.Spec)
mgmtClient, err := power.NewManagementClient(ctx, r.Client, &s.Spec)
if err != nil {
log.Error(err, "failed to create management client")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to initialize management client: %s.", err))
Expand Down Expand Up @@ -196,7 +198,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

if !poweredOn {
// it's safe to set server to PXE boot even if it's already installed, as PXE server makes sure server is PXE booted only once
err = mgmtClient.SetPXE()
err = mgmtClient.SetPXE(r.PXEMode)
if err != nil {
log.Error(err, "failed to set PXE")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to set to PXE boot once: %s.", err))
Expand Down Expand Up @@ -239,7 +241,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return f(false, ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter})
}

err = mgmtClient.SetPXE()
err = mgmtClient.SetPXE(r.PXEMode)
if err != nil {
log.Error(err, "failed to set PXE")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to set to PXE boot once: %s.", err))
Expand Down
4 changes: 3 additions & 1 deletion app/sidero-controller-manager/internal/power/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
)

// Client provides management over simple API.
Expand Down Expand Up @@ -88,7 +89,8 @@ func (c *Client) PowerCycle() error {
}

// SetPXE makes sure the node will pxe boot next time.
func (c *Client) SetPXE() error {
func (c *Client) SetPXE(mode metal.PXEMode) error {
// no way to enforce mode via QEMU API
return c.postRequest("/pxeboot")
}

Expand Down
56 changes: 56 additions & 0 deletions app/sidero-controller-manager/internal/power/factory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Package power provides common interface to manage power state.
package power

import (
"context"

"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/ipmi"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)

// NewManagementClient builds ManagementClient from the server spec.
func NewManagementClient(ctx context.Context, client client.Client, spec *v1alpha1.ServerSpec) (metal.ManagementClient, error) {
switch {
case spec.BMC != nil:
var err error

bmcSpec := *spec.BMC

if bmcSpec.User == "" {
bmcSpec.User, err = bmcSpec.UserFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}

if bmcSpec.Pass == "" {
bmcSpec.Pass, err = bmcSpec.PassFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}

if bmcSpec.Interface == "" {
bmcSpec.Interface = "lanplus"
}

if bmcSpec.Port == 0 {
bmcSpec.Port = constants.DefaultBMCPort
}

return ipmi.NewClient(bmcSpec)
case spec.ManagementAPI != nil:
return api.NewClient(*spec.ManagementAPI)
default:
return fakeClient{}, nil
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package metal
package power

import "github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"

type fakeClient struct{}

Expand All @@ -18,7 +20,7 @@ func (fakeClient) PowerCycle() error {
return nil
}

func (fakeClient) SetPXE() error {
func (fakeClient) SetPXE(mode metal.PXEMode) error {
return nil
}

Expand Down
12 changes: 10 additions & 2 deletions app/sidero-controller-manager/internal/power/ipmi/ipmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
goipmi "github.com/pensando/goipmi"

metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
)

// Link to the IPMI spec: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf
Expand Down Expand Up @@ -92,8 +93,15 @@ func (c *Client) Status() (*goipmi.ChassisStatusResponse, error) {
}

// SetPXE makes sure the node will pxe boot next time.
func (c *Client) SetPXE() error {
return c.IPMIClient.SetBootDeviceEFI(goipmi.BootDevicePxe)
func (c *Client) SetPXE(mode metal.PXEMode) error {
switch mode {
case metal.PXEModeBIOS:
return c.IPMIClient.SetBootDevice(goipmi.BootDevicePxe)
case metal.PXEModeUEFI:
return c.IPMIClient.SetBootDeviceEFI(goipmi.BootDevicePxe)
default:
return fmt.Errorf("unsupported mode %q", mode)
}
}

// GetLANConfig fetches a given param from the LAN Config. (see 23.2).
Expand Down
58 changes: 14 additions & 44 deletions app/sidero-controller-manager/internal/power/metal/metal.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,32 @@
// Package metal provides interfaces to manage metal machines.
package metal

import (
"context"

"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/ipmi"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)

// ManagementClient control power and boot order of metal machine.
type ManagementClient interface {
PowerOn() error
PowerOff() error
PowerCycle() error
IsPoweredOn() (bool, error)
SetPXE() error
SetPXE(mode PXEMode) error
IsFake() bool
Close() error
}

// NewManagementClient builds ManagementClient from the server spec.
func NewManagementClient(ctx context.Context, client client.Client, spec *v1alpha1.ServerSpec) (ManagementClient, error) {
switch {
case spec.BMC != nil:
var err error

bmcSpec := *spec.BMC
// PXEMode specifies PXE boot mode.
type PXEMode string

if bmcSpec.User == "" {
bmcSpec.User, err = bmcSpec.UserFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}

if bmcSpec.Pass == "" {
bmcSpec.Pass, err = bmcSpec.PassFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}

if bmcSpec.Interface == "" {
bmcSpec.Interface = "lanplus"
}

if bmcSpec.Port == 0 {
bmcSpec.Port = constants.DefaultBMCPort
}
const (
PXEModeBIOS = "bios"
PXEModeUEFI = "uefi"
)

return ipmi.NewClient(bmcSpec)
case spec.ManagementAPI != nil:
return api.NewClient(*spec.ManagementAPI)
func (mode PXEMode) IsValid() bool {
switch mode {
case PXEModeBIOS:
return true
case PXEModeUEFI:
return true
default:
return fakeClient{}, nil
return false
}
}
9 changes: 9 additions & 0 deletions app/sidero-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/ipxe"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/metadata"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/server"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/siderolink"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/tftp"
Expand Down Expand Up @@ -80,6 +81,7 @@ func main() {
insecureWipe bool
autoBMCSetup bool
serverRebootTimeout time.Duration
ipmiPXEMethod string

testPowerSimulatedExplicitFailureProb float64
testPowerSimulatedSilentFailureProb float64
Expand All @@ -96,6 +98,7 @@ func main() {
flag.BoolVar(&insecureWipe, "insecure-wipe", true, "Wipe head of the disk only (if false, wipe whole disk).")
flag.BoolVar(&autoBMCSetup, "auto-bmc-setup", true, "Attempt to setup BMC info automatically when agent boots.")
flag.DurationVar(&serverRebootTimeout, "server-reboot-timeout", constants.DefaultServerRebootTimeout, "Timeout to wait for the server to restart and start wipe.")
flag.StringVar(&ipmiPXEMethod, "ipmi-pxe-method", string(metal.PXEModeUEFI), fmt.Sprintf("Default method to use to set server to boot from PXE via IPMI: %s.", []string{metal.PXEModeUEFI, metal.PXEModeBIOS}))
flag.Float64Var(&testPowerSimulatedExplicitFailureProb, "test-power-simulated-explicit-failure-prob", 0, "Test failure simulation setting.")
flag.Float64Var(&testPowerSimulatedSilentFailureProb, "test-power-simulated-silent-failure-prob", 0, "Test failure simulation setting.")

Expand Down Expand Up @@ -124,6 +127,11 @@ func main() {
}
}

if !metal.PXEMode(ipmiPXEMethod).IsValid() {
setupLog.Error(fmt.Errorf("ipmi-pxe-method is invalid"), "")
os.Exit(1)
}

ctrl.SetLogger(zap.New(func(o *zap.Options) {
o.Development = true
}))
Expand Down Expand Up @@ -190,6 +198,7 @@ func main() {
APIReader: mgr.GetAPIReader(),
Recorder: recorder,
RebootTimeout: serverRebootTimeout,
PXEMode: metal.PXEMode(ipmiPXEMethod),
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: defaultMaxConcurrentReconciles}); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Server")
os.Exit(1)
Expand Down
12 changes: 3 additions & 9 deletions hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,8 @@ preface = """\

[notes]

[notes.bmc-port]
title = "BMC Port"
[notes.ipmi-pxe-method]
title = "IPMI PXE Method"
description = """\
Sidero now supports the ability to specify the port in a server's BMC info. By default, this value will be determined by talking directly to the BMC if possible, with a fallback to port 623. The value can also simply be specied as part of editing the Server resource directly.
"""

[notes.v1alpha4]
title = "CAPI v1alpha4"
description = """\
This release of Sidero brings compatibility with CAPI v1alpha4.
IPMI PXE method (UEFI, BIOS) can now be configured with `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` while installing Sidero.
"""
1 change: 1 addition & 0 deletions website/content/docs/v0.5/Overview/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ variables or as variables in the `clusterctl` configuration:
- `SIDERO_CONTROLLER_MANAGER_AUTO_BMC_SETUP` (`true`): automatically attempt to configure the BMC with a `sidero` user that will be used for all IPMI tasks.
- `SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE` (`true`): wipe only the first megabyte of each disk on the server, otherwise wipe the full disk
- `SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT` (`20m`): timeout for the server reboot (how long it might take for the server to be rebooted before Sidero retries an IPMI reboot operation)
- `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` (`uefi`): IPMI boot from PXE method: `uefi` for UEFI boot or `bios` for BIOS boot
- `SIDERO_CONTROLLER_MANAGER_BOOT_FROM_DISK_METHOD` (`ipxe-exit`): configures the way Sidero forces server to boot from disk when server hits iPXE server after initial install: `ipxe-exit` returns iPXE script with `exit` command, `http-404` returns HTTP 404 Not Found error, `ipxe-sanboot` uses iPXE `sanboot` command to boot from the first hard disk

Sidero provides two endpoints which should be made available to the infrastructure:
Expand Down

0 comments on commit 4cfdeda

Please sign in to comment.