Skip to content

Commit

Permalink
fix: wait for network and retry in platform get config funcs
Browse files Browse the repository at this point in the history
Wait for the network before trying to access the metadata service.

Retry the calls when appropriate (most platforms use `download.Download`
function which does proper retries).

Co-authored-by: Noel Georgi <git@frezbo.dev>
Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
  • Loading branch information
smira and frezbo committed Feb 9, 2023
1 parent 3d7566e commit dcbcf5a
Show file tree
Hide file tree
Showing 21 changed files with 185 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ RUN gofumpt -w ./

FROM go-generate AS gen-proto-go
WORKDIR /src/
RUN structprotogen github.com/siderolabs/talos/pkg/machinery/... /api/resource/definitions/
RUN --mount=type=cache,target=/.cache structprotogen github.com/siderolabs/talos/pkg/machinery/... /api/resource/definitions/

# compile protobuf service definitions
FROM build AS generate-build
Expand Down
29 changes: 22 additions & 7 deletions internal/app/machined/pkg/runtime/v1alpha1/platform/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-procfs/procfs"
"github.com/siderolabs/go-retry/retry"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)
Expand Down Expand Up @@ -101,24 +103,37 @@ func (a *AWS) Name() string {

// Configuration implements the runtime.Platform interface.
func (a *AWS) Configuration(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from AWS")

userdata, err := netutils.RetryFetch(ctx, a.fetchConfiguration)
if err != nil {
return nil, err
}

if strings.TrimSpace(userdata) == "" {
return nil, errors.ErrNoConfigSource
}

return []byte(userdata), nil
}

func (a *AWS) fetchConfiguration(ctx context.Context) (string, error) {
userdata, err := a.metadataClient.GetUserDataWithContext(ctx)
if err != nil {
if awsErr, ok := err.(awserr.RequestFailure); ok {
if awsErr.StatusCode() == http.StatusNotFound {
return nil, errors.ErrNoConfigSource
return "", errors.ErrNoConfigSource
}
}

return nil, fmt.Errorf("failed to fetch EC2 userdata: %w", err)
return "", retry.ExpectedErrorf("failed to fetch EC2 userdata: %w", err)
}

if strings.TrimSpace(userdata) == "" {
return nil, errors.ErrNoConfigSource
}

return []byte(userdata), nil
return userdata, nil
}

// Mode implements the runtime.Platform interface.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
Expand Down Expand Up @@ -169,8 +170,12 @@ func (a *Azure) ParseLoadBalancerIP(lbConfig LoadBalancerMetadata, exIP []netip.
// Configuration implements the platform.Platform interface.
func (a *Azure) Configuration(ctx context.Context, r state.State) ([]byte, error) {
defer func() {
if err := netutils.Wait(ctx, r); err != nil {
log.Printf("failed to wait for network, err: %s", err)
}

if err := linuxAgent(ctx); err != nil {
log.Printf("failed to update instance status, err: %s", err.Error())
log.Printf("failed to update instance status, err: %s", err)
}
}()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/utils"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/address"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
Expand Down Expand Up @@ -75,7 +76,7 @@ func (d *DigitalOcean) ParseMetadata(metadata *MetadataConfig) (*runtime.Platfor

for _, iface := range metadata.Interfaces["public"] {
if iface.IPv4 != nil {
ifAddr, err := utils.IPPrefixFrom(iface.IPv4.IPAddress, iface.IPv4.Netmask)
ifAddr, err := address.IPPrefixFrom(iface.IPv4.IPAddress, iface.IPv4.Netmask)
if err != nil {
return nil, fmt.Errorf("failed to parse ip address: %w", err)
}
Expand Down Expand Up @@ -131,7 +132,7 @@ func (d *DigitalOcean) ParseMetadata(metadata *MetadataConfig) (*runtime.Platfor
}

if iface.IPv6 != nil {
ifAddr, err := utils.IPPrefixFrom(iface.IPv6.IPAddress, strconv.Itoa(iface.IPv6.CIDR))
ifAddr, err := address.IPPrefixFrom(iface.IPv6.IPAddress, strconv.Itoa(iface.IPv6.CIDR))
if err != nil {
return nil, fmt.Errorf("failed to parse ip address: %w", err)
}
Expand Down Expand Up @@ -172,7 +173,7 @@ func (d *DigitalOcean) ParseMetadata(metadata *MetadataConfig) (*runtime.Platfor
}

if iface.AnchorIPv4 != nil {
ifAddr, err := utils.IPPrefixFrom(iface.AnchorIPv4.IPAddress, iface.AnchorIPv4.Netmask)
ifAddr, err := address.IPPrefixFrom(iface.AnchorIPv4.IPAddress, iface.AnchorIPv4.Netmask)
if err != nil {
return nil, fmt.Errorf("failed to parse ip address: %w", err)
}
Expand Down Expand Up @@ -200,7 +201,7 @@ func (d *DigitalOcean) ParseMetadata(metadata *MetadataConfig) (*runtime.Platfor
})

if iface.IPv4 != nil {
ifAddr, err := utils.IPPrefixFrom(iface.IPv4.IPAddress, iface.IPv4.Netmask)
ifAddr, err := address.IPPrefixFrom(iface.IPv4.IPAddress, iface.IPv4.Netmask)
if err != nil {
return nil, fmt.Errorf("failed to parse ip address: %w", err)
}
Expand Down Expand Up @@ -237,6 +238,10 @@ func (d *DigitalOcean) ParseMetadata(metadata *MetadataConfig) (*runtime.Platfor

// Configuration implements the platform.Platform interface.
func (d *DigitalOcean) Configuration(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from: %q", DigitalOceanUserDataEndpoint)

return download.Download(ctx, DigitalOceanUserDataEndpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
networkctrl "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/network"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
Expand Down Expand Up @@ -88,6 +89,10 @@ func (p *EquinixMetal) Name() string {

// Configuration implements the platform.Platform interface.
func (p *EquinixMetal) Configuration(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from: %q", EquinixMetalUserDataEndpoint)

return download.Download(ctx, EquinixMetalUserDataEndpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
Expand Down Expand Up @@ -67,6 +68,10 @@ func (e *Exoscale) Name() string {

// Configuration implements the runtime.Platform interface.
func (e *Exoscale) Configuration(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from %q", ExoscaleUserDataEndpoint)

return download.Download(ctx, ExoscaleUserDataEndpoint,
Expand Down
27 changes: 22 additions & 5 deletions internal/app/machined/pkg/runtime/v1alpha1/platform/gcp/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ import (
"cloud.google.com/go/compute/metadata"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-procfs/procfs"
"github.com/siderolabs/go-retry/retry"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
Expand Down Expand Up @@ -158,12 +160,14 @@ func (g *GCP) ParseMetadata(metadata *MetadataConfig, interfaces []NetworkInterf

// Configuration implements the platform.Platform interface.
func (g *GCP) Configuration(ctx context.Context, r state.State) ([]byte, error) {
userdata, err := metadata.InstanceAttributeValue("user-data")
if err != nil {
if _, ok := err.(metadata.NotDefinedError); ok {
return nil, errors.ErrNoConfigSource
}
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from AWS")

userdata, err := netutils.RetryFetch(ctx, g.fetchConfiguration)
if err != nil {
return nil, err
}

Expand All @@ -174,6 +178,19 @@ func (g *GCP) Configuration(ctx context.Context, r state.State) ([]byte, error)
return []byte(userdata), nil
}

func (g *GCP) fetchConfiguration(_ context.Context) (string, error) {
userdata, err := metadata.InstanceAttributeValue("user-data")
if err != nil {
if _, ok := err.(metadata.NotDefinedError); ok {
return "", errors.ErrNoConfigSource
}

return "", retry.ExpectedError(err)
}

return userdata, nil
}

// Mode implements the platform.Platform interface.
func (g *GCP) Mode() runtime.Mode {
return runtime.ModeCloud
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
Expand Down Expand Up @@ -147,6 +148,10 @@ func (h *Hcloud) ParseMetadata(unmarshalledNetworkConfig *NetworkConfig, metadat

// Configuration implements the runtime.Platform interface.
func (h *Hcloud) Configuration(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

log.Printf("fetching machine config from: %q", HCloudUserDataEndpoint)

return download.Download(ctx, HCloudUserDataEndpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Package utils provides utility functions for the platform package.
package utils
// Package address provides utility functions for address parsing.
package address

import (
"fmt"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Package netutils provides network-related helpers for platform implementation.
package netutils

import (
"context"
"log"
"time"

"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-retry/retry"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/network"
)

// Wait for the network to be ready to interact with platform metadata services.
func Wait(ctx context.Context, r state.State) error {
log.Printf("waiting for network to be ready")

if err := network.NewReadyCondition(r, network.AddressReady).Wait(ctx); err != nil {
return err
}

return nil
}

// RetryFetch retries fetching from metadata service.
func RetryFetch(ctx context.Context, f func(ctx context.Context) (string, error)) (string, error) {
var (
userdata string
err error
)

err = retry.Exponential(
constants.ConfigLoadTimeout,
retry.WithUnits(time.Second),
retry.WithJitter(time.Second),
retry.WithErrorLogging(true),
).RetryWithContext(
ctx, func(ctx context.Context) error {
userdata, err = f(ctx)

return err
})
if err != nil {
return "", err
}

return userdata, err
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/constants"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
Expand Down Expand Up @@ -63,6 +64,10 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error
case constants.MetalConfigISOLabel:
return readConfigFromISO()
default:
if err := netutils.Wait(ctx, r); err != nil {
return nil, err
}

return download.Download(ctx, *option, download.WithEndpointFunc(getURL))
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ func setup(ctx context.Context, t *testing.T, st state.State, mockUUID, mockSeri
linkStatusSpec.TypedSpec().HardwareAddr = nethelpers.HardwareAddr(parsedMockMAC)
linkStatusSpec.TypedSpec().LinkState = true
assert.NoError(t, createOrUpdate(ctx, st, linkStatusSpec))

netStatus := network.NewStatus(network.NamespaceName, network.StatusID)
netStatus.TypedSpec().AddressReady = true
assert.NoError(t, createOrUpdate(ctx, st, netStatus))
}

func TestPopulateURLParameters(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ import (
"path/filepath"
"strings"

"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
"golang.org/x/sys/unix"
yaml "gopkg.in/yaml.v3"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/internal/pkg/smbios"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
Expand Down Expand Up @@ -100,9 +102,13 @@ type MetadataConfig struct {
Zone string `yaml:"zone,omitempty"`
}

func (n *Nocloud) configFromNetwork(ctx context.Context, metaBaseURL string) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
func (n *Nocloud) configFromNetwork(ctx context.Context, metaBaseURL string, r state.State) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
log.Printf("fetching meta config from: %q", metaBaseURL+configMetaDataPath)

if err = netutils.Wait(ctx, r); err != nil {
return nil, nil, nil, err
}

metaConfig, err = download.Download(ctx, metaBaseURL+configMetaDataPath)
if err != nil {
metaConfig = nil
Expand Down Expand Up @@ -184,7 +190,7 @@ func (n *Nocloud) configFromCD() (metaConfig []byte, networkConfig []byte, machi
}

//nolint:gocyclo
func (n *Nocloud) acquireConfig(ctx context.Context) (metadataConfigDl, metadataNetworkConfigDl, machineConfigDl []byte, metadata *MetadataConfig, err error) {
func (n *Nocloud) acquireConfig(ctx context.Context, r state.State) (metadataConfigDl, metadataNetworkConfigDl, machineConfigDl []byte, metadata *MetadataConfig, err error) {
s, err := smbios.GetSMBIOSInfo()
if err != nil {
return nil, nil, nil, nil, err
Expand Down Expand Up @@ -222,7 +228,7 @@ func (n *Nocloud) acquireConfig(ctx context.Context) (metadataConfigDl, metadata
}

if networkSource && metaBaseURL != "" {
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromNetwork(ctx, metaBaseURL)
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromNetwork(ctx, metaBaseURL, r)
} else {
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromCD()
}
Expand Down

0 comments on commit dcbcf5a

Please sign in to comment.