Skip to content

Commit

Permalink
feat: add an option to continue booting on NTP timeout
Browse files Browse the repository at this point in the history
Fixes #4224

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
  • Loading branch information
smira committed Sep 16, 2021
1 parent ef36849 commit fdd80a1
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 5 deletions.
46 changes: 46 additions & 0 deletions internal/app/machined/pkg/controllers/time/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"fmt"
"sync"
stdtime "time"

"github.com/AlekSi/pointer"
"github.com/cosi-project/runtime/pkg/controller"
Expand All @@ -26,6 +27,8 @@ import (
type SyncController struct {
V1Alpha1Mode v1alpha1runtime.Mode
NewNTPSyncer NewNTPSyncerFunc

bootTime stdtime.Time
}

// Name implements controller.Controller interface.
Expand Down Expand Up @@ -75,6 +78,10 @@ type NewNTPSyncerFunc func(*zap.Logger, []string) NTPSyncer
//
//nolint:gocyclo,cyclop
func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
if ctrl.bootTime.IsZero() {
ctrl.bootTime = stdtime.Now()
}

if ctrl.NewNTPSyncer == nil {
ctrl.NewNTPSyncer = func(logger *zap.Logger, timeServers []string) NTPSyncer {
return ntp.NewSyncer(logger, timeServers)
Expand All @@ -92,6 +99,9 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge

timeSynced bool
epoch int

timeSyncTimeoutTimer *stdtime.Timer
timeSyncTimeoutCh <-chan stdtime.Time
)

defer func() {
Expand All @@ -100,6 +110,10 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge

syncWg.Wait()
}

if timeSyncTimeoutTimer != nil {
timeSyncTimeoutTimer.Stop()
}
}()

for {
Expand All @@ -112,6 +126,9 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge
timeSynced = true
case <-epochCh:
epoch++
case <-timeSyncTimeoutCh:
timeSynced = true
timeSyncTimeoutTimer = nil
}

timeServersStatus, err := r.Get(ctx, resource.NewMetadata(network.NamespaceName, network.TimeServerStatusType, network.TimeServerID, resource.VersionUndefined))
Expand All @@ -133,6 +150,8 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge
}
}

var syncTimeout stdtime.Duration

syncDisabled := false

if ctrl.V1Alpha1Mode == v1alpha1runtime.ModeContainer {
Expand All @@ -143,6 +162,33 @@ func (ctrl *SyncController) Run(ctx context.Context, r controller.Runtime, logge
syncDisabled = true
}

if cfg != nil {
syncTimeout = cfg.(*config.MachineConfig).Config().Machine().Time().BootTimeout()
}

if !timeSynced {
sinceBoot := stdtime.Since(ctrl.bootTime)

switch {
case syncTimeout == 0:
// disable sync timeout
if timeSyncTimeoutTimer != nil {
timeSyncTimeoutTimer.Stop()
}

timeSyncTimeoutCh = nil
case sinceBoot > syncTimeout:
// over sync timeout already, so in sync
timeSynced = true
default:
// make sure timer fires in whatever time is left till the timeout
if timeSyncTimeoutTimer == nil || !timeSyncTimeoutTimer.Reset(syncTimeout-sinceBoot) {
timeSyncTimeoutTimer = stdtime.NewTimer(syncTimeout - sinceBoot)
timeSyncTimeoutCh = timeSyncTimeoutTimer.C
}
}
}

switch {
case syncDisabled && syncer != nil:
// stop syncing
Expand Down
49 changes: 49 additions & 0 deletions internal/app/machined/pkg/controllers/time/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,55 @@ func (suite *SyncSuite) TestReconcileSyncChangeConfig() {
))
}

func (suite *SyncSuite) TestReconcileSyncBootTimeout() {
suite.Require().NoError(suite.runtime.RegisterController(&timectrl.SyncController{
V1Alpha1Mode: v1alpha1runtime.ModeMetal,
NewNTPSyncer: suite.newMockSyncer,
}))

suite.startRuntime()

timeServers := network.NewTimeServerStatus(network.NamespaceName, network.TimeServerID)
timeServers.TypedSpec().NTPServers = []string{constants.DefaultNTPServer}
suite.Require().NoError(suite.state.Create(suite.ctx, timeServers))

suite.Assert().NoError(retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
return suite.assertTimeStatus(
timeresource.StatusSpec{
Synced: false,
Epoch: 0,
SyncDisabled: false,
},
)
},
))

cfg := config.NewMachineConfig(&v1alpha1.Config{
ConfigVersion: "v1alpha1",
MachineConfig: &v1alpha1.MachineConfig{
MachineTime: &v1alpha1.TimeConfig{
TimeBootTimeout: 5 * time.Second,
},
},
ClusterConfig: &v1alpha1.ClusterConfig{},
})

suite.Require().NoError(suite.state.Create(suite.ctx, cfg))

suite.Assert().NoError(retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
return suite.assertTimeStatus(
timeresource.StatusSpec{
Synced: true,
Epoch: 0,
SyncDisabled: false,
},
)
},
))
}

func (suite *SyncSuite) TearDownTest() {
suite.T().Log("tear down")

Expand Down
1 change: 1 addition & 0 deletions pkg/machinery/config/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ type KubeSpan interface {
type Time interface {
Disabled() bool
Servers() []string
BootTimeout() time.Duration
}

// Kubelet defines the requirements for a config that pertains to kubelet
Expand Down
5 changes: 5 additions & 0 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,11 @@ func (t *TimeConfig) Servers() []string {
return t.TimeServers
}

// BootTimeout implements the config.Provider interface.
func (t *TimeConfig) BootTimeout() time.Duration {
return t.TimeBootTimeout
}

// Image implements the config.Provider interface.
func (i *InstallConfig) Image() string {
return i.InstallImage
Expand Down
10 changes: 8 additions & 2 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ var (
}

machineTimeExample = &TimeConfig{
TimeServers: []string{"time.cloudflare.com"},
TimeServers: []string{"time.cloudflare.com"},
TimeBootTimeout: 2 * time.Minute,
}

machineSysctlsExample = map[string]string{
Expand Down Expand Up @@ -1065,7 +1066,12 @@ type TimeConfig struct {
// description: |
// Specifies time (NTP) servers to use for setting the system time.
// Defaults to `pool.ntp.org`
TimeServers []string `yaml:"servers,omitempty"` // This parameter only supports a single time server.
TimeServers []string `yaml:"servers,omitempty"`
// description: |
// Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
// NTP sync will be still running in the background.
// Defaults to "infinity" (waiting forever for time sync)
TimeBootTimeout time.Duration `yaml:"bootTimeout,omitempty"`
}

// RegistriesConfig represents the image pull options.
Expand Down
9 changes: 7 additions & 2 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion website/content/docs/v0.13/Reference/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ time:
# Specifies time (NTP) servers to use for setting the system time.
servers:
- time.cloudflare.com
bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
```


Expand Down Expand Up @@ -2103,6 +2104,7 @@ disabled: false # Indicates if the time service is disabled for the machine.
# Specifies time (NTP) servers to use for setting the system time.
servers:
- time.cloudflare.com
bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
```

<hr />
Expand Down Expand Up @@ -2130,8 +2132,19 @@ Defaults to `false`.
Specifies time (NTP) servers to use for setting the system time.
Defaults to `pool.ntp.org`

</div>

<hr />
<div class="dd">

<code>bootTimeout</code> <i>Duration</i>

</div>
<div class="dt">

> This parameter only supports a single time server.
Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
NTP sync will be still running in the background.
Defaults to "infinity" (waiting forever for time sync)

</div>

Expand Down

0 comments on commit fdd80a1

Please sign in to comment.