Skip to content

Commit 876f836

Browse files
shanduurpranav767
andcommitted
feat: add support for HTTP Probes
- Add HTTPProbeSpec to ProbeSpecSpec (URL + timeout) - Implement probeHTTP() to send GET requests, treat 2xx/3xx as success - Support machine proxy config via httpdefaults.PatchTransport - Add HTTPProbeConfig v1alpha1 document and controller integration - Add unit and integration tests for HTTP probe lifecycle Signed-off-by: Pranav Patil <pranavppatil767@gmail.com> Co-authored-by: Pranav Patil <pranavppatil767@gmail.com> Signed-off-by: Mateusz Urbanek <mateusz.urbanek@siderolabs.com>
1 parent 9b776d5 commit 876f836

24 files changed

Lines changed: 1746 additions & 389 deletions

File tree

api/resource/definitions/network/network.proto

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,14 @@ message EthernetStatusSpec {
237237
repeated talos.resource.definitions.enums.NethelpersWOLMode wake_on_lan = 10;
238238
}
239239

240+
// HTTPProbeSpec describes the HTTP Probe.
241+
message HTTPProbeSpec {
242+
// URL to probe: http:// or https:// URL.
243+
common.URL url = 1;
244+
// Timeout for the probe.
245+
google.protobuf.Duration timeout = 2;
246+
}
247+
240248
// HardwareAddrSpec describes spec for the link.
241249
message HardwareAddrSpec {
242250
// Name defines link name
@@ -502,10 +510,12 @@ message ProbeSpecSpec {
502510
google.protobuf.Duration interval = 1;
503511
// FailureThreshold is the number of consecutive failures for the probe to be considered failed after having succeeded.
504512
int64 failure_threshold = 2;
505-
// One of the probe types should be specified, for now it's only TCP.
513+
// TCP is the TCP probe spec. One of TCP or HTTP must be specified.
506514
TCPProbeSpec tcp = 3;
507515
// Configuration layer.
508516
talos.resource.definitions.enums.NetworkConfigLayer config_layer = 4;
517+
// HTTP is the HTTP probe spec. One of TCP or HTTP must be specified.
518+
HTTPProbeSpec http = 5;
509519
}
510520

511521
// ProbeStatusSpec describes the Probe.

hack/release.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ The default installer image has been updated to use the Image Factory.
5151
title = "Host DNS Configuration"
5252
description = """\
5353
HostDNS configuration was moved from the v1alpha1 config `.machine.features.hostDNS` field to the new `hostDNS` in the `ResolverConfig` document.
54+
"""
55+
56+
[notes.httpProbe]
57+
title = "HTTP Probe Support"
58+
description = """\
59+
Talos now supports HTTP network probes, allowing for monitoring of HTTP endpoints.
60+
HTTP responses with status 200-399 are considered successful, while connection and transport errors are treated as failures.
5461
"""
5562

5663
[make_deps]

internal/app/machined/pkg/controllers/network/internal/probe/probe.go

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@ import (
99
"context"
1010
"errors"
1111
"net"
12+
"net/http"
1213
"sync"
1314
"syscall"
1415
"time"
1516

17+
"github.com/hashicorp/go-cleanhttp"
1618
"github.com/siderolabs/gen/channel"
1719
"go.uber.org/zap"
1820

21+
"github.com/siderolabs/talos/pkg/httpdefaults"
1922
"github.com/siderolabs/talos/pkg/machinery/resources/network"
2023
)
2124

@@ -121,11 +124,11 @@ func (runner *Runner) run(ctx context.Context, notifyCh chan<- Notification, log
121124

122125
// probe runs a probe.
123126
func (runner *Runner) probe(ctx context.Context) error {
124-
var zeroTCP network.TCPProbeSpec
125-
126127
switch {
127-
case runner.Spec.TCP != zeroTCP:
128+
case runner.Spec.TCP != (network.TCPProbeSpec{}):
128129
return runner.probeTCP(ctx)
130+
case runner.Spec.HTTP != (network.HTTPProbeSpec{}):
131+
return runner.probeHTTP(ctx)
129132
default:
130133
return errors.New("no probe type specified")
131134
}
@@ -152,3 +155,36 @@ func (runner *Runner) probeTCP(ctx context.Context) error {
152155

153156
return conn.Close()
154157
}
158+
159+
// probeHTTP runs an HTTP probe.
160+
//
161+
// HTTP responses with status 200-399 are considered success.
162+
// Status 400+ and connection/transport errors are treated as failures.
163+
// The client honors the machine's proxy configuration via httpdefaults.PatchTransport.
164+
func (runner *Runner) probeHTTP(ctx context.Context) error {
165+
client := &http.Client{
166+
Transport: httpdefaults.PatchTransport(cleanhttp.DefaultTransport()),
167+
CheckRedirect: func(*http.Request, []*http.Request) error {
168+
return http.ErrUseLastResponse
169+
},
170+
}
171+
172+
ctx, cancel := context.WithTimeout(ctx, runner.Spec.HTTP.Timeout)
173+
defer cancel()
174+
175+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, runner.Spec.HTTP.URL.String(), nil)
176+
if err != nil {
177+
return err
178+
}
179+
180+
resp, err := client.Do(req)
181+
if err != nil {
182+
return err
183+
}
184+
185+
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest {
186+
return errors.New("received non-success status code: " + resp.Status)
187+
}
188+
189+
return resp.Body.Close()
190+
}

internal/app/machined/pkg/controllers/network/internal/probe/probe_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"net/http/httptest"
1111
"net/url"
12+
"sync/atomic"
1213
"testing"
1314
"testing/synctest"
1415
"time"
@@ -21,6 +22,18 @@ import (
2122
"github.com/siderolabs/talos/pkg/machinery/resources/network"
2223
)
2324

25+
type swapHandler struct {
26+
v atomic.Value // stores http.Handler
27+
}
28+
29+
func (h *swapHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
30+
h.v.Load().(http.Handler).ServeHTTP(w, r)
31+
}
32+
33+
func (h *swapHandler) Swap(newHandler http.Handler) {
34+
h.v.Store(newHandler)
35+
}
36+
2437
func TestProbeHTTP(t *testing.T) {
2538
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
2639
w.WriteHeader(http.StatusOK)
@@ -151,3 +164,75 @@ func TestProbeConsecutiveFailures(t *testing.T) {
151164
assert.False(t, notify.Status.Success)
152165
})
153166
}
167+
168+
func TestProbeHTTPProbe(t *testing.T) {
169+
// Server returns 200 OK.
170+
handler := &swapHandler{}
171+
handler.Swap(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
172+
w.WriteHeader(http.StatusOK)
173+
}))
174+
175+
server := httptest.NewServer(handler)
176+
t.Cleanup(server.Close)
177+
178+
probeURL, err := url.Parse(server.URL)
179+
require.NoError(t, err)
180+
181+
p := probe.Runner{
182+
ID: "http-test",
183+
Spec: network.ProbeSpecSpec{
184+
Interval: 10 * time.Millisecond,
185+
HTTP: network.HTTPProbeSpec{
186+
URL: probeURL,
187+
Timeout: time.Second,
188+
},
189+
},
190+
}
191+
192+
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
193+
t.Cleanup(cancel)
194+
195+
notifyCh := make(chan probe.Notification)
196+
197+
p.Start(ctx, notifyCh, zaptest.NewLogger(t))
198+
t.Cleanup(p.Stop)
199+
200+
// probe should succeed — 2xx/3xx responses count as success
201+
for range 3 {
202+
assert.Equal(t, probe.Notification{
203+
ID: "http-test",
204+
Status: network.ProbeStatusSpec{
205+
Success: true,
206+
},
207+
}, <-notifyCh)
208+
}
209+
210+
// 4xx/5xx responses count as failure
211+
handler.Swap(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
212+
w.WriteHeader(http.StatusServiceUnavailable)
213+
}))
214+
215+
for range 3 {
216+
notification := <-notifyCh
217+
assert.Equal(t, "http-test", notification.ID)
218+
assert.False(t, notification.Status.Success)
219+
assert.NotEmpty(t, notification.Status.LastError)
220+
}
221+
222+
// stop the server — now the probe should fail
223+
server.Close()
224+
225+
for {
226+
notification := <-notifyCh
227+
228+
if notification.Status.Success {
229+
continue
230+
}
231+
232+
assert.Equal(t, "http-test", notification.ID)
233+
assert.False(t, notification.Status.Success)
234+
assert.NotEmpty(t, notification.Status.LastError)
235+
236+
break
237+
}
238+
}

internal/app/machined/pkg/controllers/network/probe_config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ func (ctrl *ProbeConfigController) parseMachineConfiguration(cfg *config.Machine
131131
Endpoint: probeConfig.Endpoint(),
132132
Timeout: probeConfig.Timeout(),
133133
}
134+
case configconfig.NetworkHTTPProbeConfig:
135+
spec.HTTP = network.HTTPProbeSpec{
136+
URL: probeConfig.URL().URL,
137+
Timeout: probeConfig.Timeout(),
138+
}
134139
default:
135140
panic(fmt.Sprintf("unsupported probe config type: %T", probeConfig))
136141
}

internal/app/machined/pkg/controllers/network/probe_config_test.go

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,19 @@
55
package network_test
66

77
import (
8+
"net/url"
89
"testing"
910
"time"
1011

1112
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
13+
"github.com/siderolabs/gen/ensure"
1214
"github.com/stretchr/testify/assert"
1315
"github.com/stretchr/testify/suite"
1416

1517
"github.com/siderolabs/talos/internal/app/machined/pkg/controllers/ctest"
1618
netctrl "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/network"
1719
"github.com/siderolabs/talos/pkg/machinery/config/container"
20+
"github.com/siderolabs/talos/pkg/machinery/config/types/meta"
1821
networkcfg "github.com/siderolabs/talos/pkg/machinery/config/types/network"
1922
"github.com/siderolabs/talos/pkg/machinery/resources/config"
2023
"github.com/siderolabs/talos/pkg/machinery/resources/network"
@@ -29,7 +32,7 @@ func (suite *ProbeConfigSuite) TestNoConfig() {
2932
ctest.AssertNoResource[*network.ProbeSpec](suite, "tcp:proxy.example.com:3128", rtestutils.WithNamespace(network.NamespaceName))
3033
}
3134

32-
func (suite *ProbeConfigSuite) TestSingleProbe() {
35+
func (suite *ProbeConfigSuite) TestSingleProbe() { //nolint:dupl
3336
probeConfig := networkcfg.NewTCPProbeConfigV1Alpha1("proxy-check")
3437
probeConfig.ProbeInterval = time.Second
3538
probeConfig.ProbeFailureThreshold = 3
@@ -132,6 +135,58 @@ func (suite *ProbeConfigSuite) TestMultipleProbes() {
132135
ctest.AssertNoResource[*network.ProbeSpec](suite, "configuration/tcp:8.8.8.8:53", rtestutils.WithNamespace(network.ConfigNamespaceName))
133136
}
134137

138+
func (suite *ProbeConfigSuite) TestHTTPProbe() { //nolint:dupl
139+
probeConfig := networkcfg.NewHTTPProbeConfigV1Alpha1("http-check")
140+
probeConfig.ProbeInterval = time.Second
141+
probeConfig.ProbeFailureThreshold = 3
142+
probeConfig.HTTPEndpoint = meta.URL{URL: ensure.Value(url.Parse("https://example.com"))}
143+
probeConfig.HTTPTimeout = 10 * time.Second
144+
145+
ctr, err := container.New(probeConfig)
146+
suite.Require().NoError(err)
147+
148+
cfg := config.NewMachineConfig(ctr)
149+
suite.Create(cfg)
150+
151+
ctest.AssertResources(
152+
suite,
153+
[]string{
154+
"configuration/http:https://example.com",
155+
}, func(r *network.ProbeSpec, asrt *assert.Assertions) {
156+
asrt.Equal(time.Second, r.TypedSpec().Interval)
157+
asrt.Equal(3, r.TypedSpec().FailureThreshold)
158+
asrt.Equal("https://example.com", r.TypedSpec().HTTP.URL.String())
159+
asrt.Equal(10*time.Second, r.TypedSpec().HTTP.Timeout)
160+
asrt.Equal(network.ConfigMachineConfiguration, r.TypedSpec().ConfigLayer)
161+
},
162+
rtestutils.WithNamespace(network.ConfigNamespaceName),
163+
)
164+
165+
// Update the probe config
166+
ctest.UpdateWithConflicts(suite, cfg, func(r *config.MachineConfig) error {
167+
docs := r.Container().Documents()
168+
probeDoc := docs[0].(*networkcfg.HTTPProbeConfigV1Alpha1)
169+
probeDoc.ProbeFailureThreshold = 5
170+
171+
return nil
172+
})
173+
174+
ctest.AssertResources(
175+
suite,
176+
[]string{
177+
"configuration/http:https://example.com",
178+
}, func(r *network.ProbeSpec, asrt *assert.Assertions) {
179+
asrt.Equal(5, r.TypedSpec().FailureThreshold)
180+
},
181+
rtestutils.WithNamespace(network.ConfigNamespaceName),
182+
)
183+
184+
// Remove the config
185+
suite.Destroy(cfg)
186+
187+
ctest.AssertNoResource[*network.ProbeSpec](suite, "configuration/http:https://example.com", rtestutils.WithNamespace(network.ConfigNamespaceName))
188+
}
189+
135190
func TestProbeConfigSuite(t *testing.T) {
136191
t.Parallel()
137192

0 commit comments

Comments
 (0)