diff --git a/hack/release.toml b/hack/release.toml index 28921c3fdf..5ea40fe813 100644 --- a/hack/release.toml +++ b/hack/release.toml @@ -141,10 +141,20 @@ This is mostly implemented for extension services that log to syslog. """ [notes.ntp] - title = "NTP" + title = "Time Sync" description = """\ Default NTP server was updated to be `time.cloudflare.com` instead of `pool.ntp.org`. Default server is only used if the user does not specify any NTP servers in the configuration. + +Talos Linux can now sync to PTP devices (e.g. provided by the hypervisor) skipping the network time servers. +In order to activate PTP sync, set `machine.time.servers` to the PTP device name (e.g. `/dev/ptp0`): + +```yaml +machine: + time: + servers: + - /dev/ptp0 +``` """ [make_deps] diff --git a/internal/pkg/ntp/ntp.go b/internal/pkg/ntp/ntp.go index 11b58c7371..719bb07e68 100644 --- a/internal/pkg/ntp/ntp.go +++ b/internal/pkg/ntp/ntp.go @@ -11,8 +11,10 @@ import ( "fmt" "math/bits" "net" + "os" "reflect" "slices" + "strings" "sync" "time" @@ -52,6 +54,13 @@ type Syncer struct { AdjustTime AdjustTimeFunc } +// Measurement is a struct containing correction data based on a time request. +type Measurement struct { + ClockOffset time.Duration + Leap ntp.LeapIndicator + Spike bool +} + // NewSyncer creates new Syncer with default configuration. func NewSyncer(logger *zap.Logger, timeServers []string) *Syncer { syncer := &Syncer{ @@ -169,9 +178,8 @@ func (syncer *Syncer) Run(ctx context.Context) { } spike := false - - if resp != nil && resp.Validate() == nil { - spike = syncer.isSpike(resp) + if resp != nil { + spike = resp.Spike } switch { @@ -181,17 +189,15 @@ func (syncer *Syncer) Run(ctx context.Context) { case pollInterval == 0: // first sync pollInterval = syncer.MinPoll - case err != nil: - // error encountered, don't change the poll interval case !spike && absDuration(resp.ClockOffset) > ExpectedAccuracy: // huge offset, retry sync with minimum interval pollInterval = syncer.MinPoll - case absDuration(resp.ClockOffset) < ExpectedAccuracy*100/25: // *0.25 + case absDuration(resp.ClockOffset) < ExpectedAccuracy*25/100: // *0.25 // clock offset is within 25% of expected accuracy, increase poll interval if pollInterval < syncer.MaxPoll { pollInterval *= 2 } - case spike || absDuration(resp.ClockOffset) > ExpectedAccuracy*100/75: // *0.75 + case spike || absDuration(resp.ClockOffset) > ExpectedAccuracy*75/100: // *0.75 // spike was detected or clock offset is too large, decrease poll interval if pollInterval > syncer.MinPoll { pollInterval /= 2 @@ -209,7 +215,7 @@ func (syncer *Syncer) Run(ctx context.Context) { zap.Bool("spike", spike), ) - if resp != nil && resp.Validate() == nil && !spike { + if resp != nil && !spike { err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval) if err == nil { @@ -234,14 +240,14 @@ func (syncer *Syncer) Run(ctx context.Context) { } } -func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *ntp.Response, err error) { +func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, measurement *Measurement, err error) { lastSyncServer = syncer.getLastSyncServer() failedServer := "" if lastSyncServer != "" { - resp, err = syncer.queryServer(lastSyncServer) + measurement, err = syncer.queryServer(lastSyncServer) if err != nil { - syncer.logger.Error(fmt.Sprintf("ntp query error with server %q", lastSyncServer), zap.Error(err)) + syncer.logger.Error(fmt.Sprintf("time query error with server %q", lastSyncServer), zap.Error(err)) failedServer = lastSyncServer lastSyncServer = "" @@ -254,7 +260,7 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n serverList, err = syncer.resolveServers(ctx) if err != nil { - return lastSyncServer, resp, err + return lastSyncServer, measurement, err } for _, server := range serverList { @@ -265,15 +271,15 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n select { case <-ctx.Done(): - return lastSyncServer, resp, ctx.Err() + return lastSyncServer, measurement, ctx.Err() case <-syncer.restartSyncCh: - return lastSyncServer, resp, nil + return lastSyncServer, measurement, nil default: } - resp, err = syncer.queryServer(server) + measurement, err = syncer.queryServer(server) if err != nil { - syncer.logger.Error(fmt.Sprintf("ntp query error with server %q", server), zap.Error(err)) + syncer.logger.Error(fmt.Sprintf("time query error with server %q", server), zap.Error(err)) err = nil } else { syncer.setLastSyncServer(server) @@ -284,20 +290,28 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n } } - return lastSyncServer, resp, err + return lastSyncServer, measurement, err +} + +func (syncer *Syncer) isPTPDevice(server string) bool { + return strings.HasPrefix(server, "/dev/") } func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) { var serverList []string for _, server := range syncer.getTimeServers() { - ips, err := net.LookupIP(server) - if err != nil { - syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err)) - } + if syncer.isPTPDevice(server) { + serverList = append(serverList, server) + } else { + ips, err := net.LookupIP(server) + if err != nil { + syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err)) + } - for _, ip := range ips { - serverList = append(serverList, ip.String()) + for _, ip := range ips { + serverList = append(serverList, ip.String()) + } } select { @@ -310,7 +324,57 @@ func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) { return serverList, nil } -func (syncer *Syncer) queryServer(server string) (*ntp.Response, error) { +func (syncer *Syncer) queryServer(server string) (*Measurement, error) { + if syncer.isPTPDevice(server) { + return syncer.queryPTP(server) + } + + return syncer.queryNTP(server) +} + +func (syncer *Syncer) queryPTP(server string) (*Measurement, error) { + phc, err := os.Open(server) + if err != nil { + return nil, err + } + + defer phc.Close() //nolint:errcheck + + // From clock_gettime(2): + // + // Using the appropriate macros, open file descriptors may be converted into clock IDs and passed to clock_gettime(), clock_settime(), and clock_adjtime(2). The + // following example shows how to convert a file descriptor into a dynamic clock ID. + // + // #define CLOCKFD 3 + // #define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) + + clockid := int32(3 | (^phc.Fd() << 3)) + + var ts unix.Timespec + + err = unix.ClockGettime(clockid, &ts) + if err != nil { + return nil, err + } + + offset := time.Until(time.Unix(ts.Sec, ts.Nsec)) + syncer.logger.Debug("PTP clock", + zap.Duration("clock_offset", offset), + zap.Int64("sec", ts.Sec), + zap.Int64("nsec", ts.Nsec), + zap.String("device", server), + ) + + meas := &Measurement{ + ClockOffset: offset, + Leap: 0, + Spike: false, + } + + return meas, err +} + +func (syncer *Syncer) queryNTP(server string) (*Measurement, error) { resp, err := syncer.NTPQuery(server) if err != nil { return nil, err @@ -327,11 +391,19 @@ func (syncer *Syncer) queryServer(server string) (*ntp.Response, error) { zap.Duration("root_distance", resp.RootDistance), ) - if err = resp.Validate(); err != nil { - return resp, err + validationError := resp.Validate() + + measurement := &Measurement{ + ClockOffset: resp.ClockOffset, + Leap: resp.Leap, + Spike: false, + } + + if validationError == nil { + measurement.Spike = syncer.isSpike(resp) } - return resp, err + return measurement, validationError } // log2i returns 0 for v == 0 and v == 1. diff --git a/pkg/machinery/config/schemas/config.schema.json b/pkg/machinery/config/schemas/config.schema.json index e06730c40b..58d785b0c2 100644 --- a/pkg/machinery/config/schemas/config.schema.json +++ b/pkg/machinery/config/schemas/config.schema.json @@ -3132,9 +3132,9 @@ }, "type": "array", "title": "servers", - "description": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to time.cloudflare.com.\n", - "markdownDescription": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to `time.cloudflare.com`.", - "x-intellij-html-description": "\u003cp\u003eSpecifies time (NTP) servers to use for setting the system time.\nDefaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n" + "description": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to time.cloudflare.com.\n\nTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as “/dev/ptp0” or “/dev/ptp_kvm”.\n", + "markdownDescription": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to `time.cloudflare.com`.\n\n Talos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \"/dev/ptp0\" or \"/dev/ptp_kvm\".", + "x-intellij-html-description": "\u003cp\u003edescription: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n\n\u003cp\u003eTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \u0026ldquo;/dev/ptp0\u0026rdquo; or \u0026ldquo;/dev/ptp_kvm\u0026rdquo;.\u003c/p\u003e\n" }, "bootTimeout": { "type": "string", diff --git a/pkg/machinery/config/types/v1alpha1/v1alpha1_types.go b/pkg/machinery/config/types/v1alpha1/v1alpha1_types.go index 9aa36aee71..27c30f9cf2 100644 --- a/pkg/machinery/config/types/v1alpha1/v1alpha1_types.go +++ b/pkg/machinery/config/types/v1alpha1/v1alpha1_types.go @@ -969,6 +969,9 @@ type TimeConfig struct { // description: | // Specifies time (NTP) servers to use for setting the system time. // Defaults to `time.cloudflare.com`. + // + // Talos can also sync to the PTP time source (e.g provided by the hypervisor), + // provide the path to the PTP device as "/dev/ptp0" or "/dev/ptp_kvm". TimeServers []string `yaml:"servers,omitempty"` // description: | // Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence. diff --git a/pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go b/pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go index fa3a6c4dfa..419e1bf515 100644 --- a/pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go +++ b/pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go @@ -1195,8 +1195,8 @@ func (TimeConfig) Doc() *encoder.Doc { Name: "servers", Type: "[]string", Note: "", - Description: "Specifies time (NTP) servers to use for setting the system time.\nDefaults to `time.cloudflare.com`.", - Comments: [3]string{"" /* encoder.HeadComment */, "Specifies time (NTP) servers to use for setting the system time." /* encoder.LineComment */, "" /* encoder.FootComment */}, + Description: "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to `time.cloudflare.com`.\n\n Talos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \"/dev/ptp0\" or \"/dev/ptp_kvm\".\n", + Comments: [3]string{"" /* encoder.HeadComment */, "description: |" /* encoder.LineComment */, "" /* encoder.FootComment */}, }, { Name: "bootTimeout", diff --git a/website/content/v1.7/reference/configuration/v1alpha1/config.md b/website/content/v1.7/reference/configuration/v1alpha1/config.md index dda5cdbdb2..8ff411b2c4 100644 --- a/website/content/v1.7/reference/configuration/v1alpha1/config.md +++ b/website/content/v1.7/reference/configuration/v1alpha1/config.md @@ -314,7 +314,7 @@ env: |`time` |TimeConfig |Used to configure the machine's time settings.
Show example(s){{< highlight yaml >}} time: disabled: false # Indicates if the time service is disabled for the machine. - # Specifies time (NTP) servers to use for setting the system time. + # description: | servers: - time.cloudflare.com bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence. @@ -1984,7 +1984,7 @@ TimeConfig represents the options for configuring time on a machine. machine: time: disabled: false # Indicates if the time service is disabled for the machine. - # Specifies time (NTP) servers to use for setting the system time. + # description: | servers: - time.cloudflare.com bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence. @@ -1994,7 +1994,7 @@ machine: | Field | Type | Description | Value(s) | |-------|------|-------------|----------| |`disabled` |bool |
Indicates if the time service is disabled for the machine.Defaults to `false`.
| | -|`servers` |[]string |
Specifies time (NTP) servers to use for setting the system time.Defaults to `time.cloudflare.com`.
| | +|`servers` |[]string |
description: | Specifies time (NTP) servers to use for setting the system time.
Defaults to `time.cloudflare.com`.

Talos can also sync to the PTP time source (e.g provided by the hypervisor),
provide the path to the PTP device as "/dev/ptp0" or "/dev/ptp_kvm".
| | |`bootTimeout` |Duration |
Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.NTP sync will be still running in the background.
Defaults to "infinity" (waiting forever for time sync)
| | diff --git a/website/content/v1.7/schemas/config.schema.json b/website/content/v1.7/schemas/config.schema.json index e06730c40b..58d785b0c2 100644 --- a/website/content/v1.7/schemas/config.schema.json +++ b/website/content/v1.7/schemas/config.schema.json @@ -3132,9 +3132,9 @@ }, "type": "array", "title": "servers", - "description": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to time.cloudflare.com.\n", - "markdownDescription": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to `time.cloudflare.com`.", - "x-intellij-html-description": "\u003cp\u003eSpecifies time (NTP) servers to use for setting the system time.\nDefaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n" + "description": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to time.cloudflare.com.\n\nTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as “/dev/ptp0” or “/dev/ptp_kvm”.\n", + "markdownDescription": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to `time.cloudflare.com`.\n\n Talos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \"/dev/ptp0\" or \"/dev/ptp_kvm\".", + "x-intellij-html-description": "\u003cp\u003edescription: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n\n\u003cp\u003eTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \u0026ldquo;/dev/ptp0\u0026rdquo; or \u0026ldquo;/dev/ptp_kvm\u0026rdquo;.\u003c/p\u003e\n" }, "bootTimeout": { "type": "string",