Skip to content

Commit 9b819ee

Browse files
committed
fix: watch bufer overrun for RouteStatus
Fixes #8157 This PR contains two fixes, both related to the same problem. Several routes for different links but same IPv6 destination might exist at the same time, so route resource ID should handle that. The problem was that these routes were mis-reported causing internally updates for the same resources multiple times (equal to the number of the links). Don't trigger controllers more often than 10 times/seconds (with burst of 5) for kernel notifications. This ensures Talos doesn't try to reflect current state of the network subsystem too often as resources, which causes excessive CPU usage and might potentially lead to the buffer overrun under high rate of changes. Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com> (cherry picked from commit 474eccd)
1 parent 730913f commit 9b819ee

File tree

16 files changed

+152
-19
lines changed

16 files changed

+152
-19
lines changed

internal/app/machined/pkg/controllers/kubespan/manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
455455
if err = r.Modify(ctx,
456456
network.NewRouteSpec(
457457
network.ConfigNamespaceName,
458-
network.LayeredID(network.ConfigOperator, network.RouteID(spec.Table, spec.Family, spec.Destination, spec.Gateway, spec.Priority)),
458+
network.LayeredID(network.ConfigOperator, network.RouteID(spec.Table, spec.Family, spec.Destination, spec.Gateway, spec.Priority, spec.OutLinkName)),
459459
),
460460
func(r resource.Resource) error {
461461
*r.(*network.RouteSpec).TypedSpec() = spec

internal/app/machined/pkg/controllers/kubespan/manager_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ func (suite *ManagerSuite) TestReconcile() {
233233
netip.Prefix{},
234234
netip.Addr{},
235235
1,
236+
"kubespan",
236237
),
237238
),
238239
network.LayeredID(
@@ -243,6 +244,7 @@ func (suite *ManagerSuite) TestReconcile() {
243244
netip.Prefix{},
244245
netip.Addr{},
245246
1,
247+
"kubespan",
246248
),
247249
),
248250
},

internal/app/machined/pkg/controllers/network/address_spec.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func (ctrl *AddressSpecController) Outputs() []controller.Output {
5454
//nolint:gocyclo
5555
func (ctrl *AddressSpecController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
5656
// watch link changes as some address might need to be re-applied if the link appears
57-
watcher, err := watch.NewRtNetlink(r, unix.RTMGRP_LINK)
57+
watcher, err := watch.NewRtNetlink(watch.NewDefaultRateLimitedTrigger(ctx, r), unix.RTMGRP_LINK)
5858
if err != nil {
5959
return err
6060
}

internal/app/machined/pkg/controllers/network/address_status.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (ctrl *AddressStatusController) Outputs() []controller.Output {
4747
//
4848
//nolint:gocyclo
4949
func (ctrl *AddressStatusController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
50-
watcher, err := watch.NewRtNetlink(r, unix.RTMGRP_LINK|unix.RTMGRP_IPV4_IFADDR|unix.RTMGRP_IPV6_IFADDR)
50+
watcher, err := watch.NewRtNetlink(watch.NewDefaultRateLimitedTrigger(ctx, r), unix.RTMGRP_LINK|unix.RTMGRP_IPV4_IFADDR|unix.RTMGRP_IPV6_IFADDR)
5151
if err != nil {
5252
return err
5353
}

internal/app/machined/pkg/controllers/network/link_spec.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func (ctrl *LinkSpecController) Run(ctx context.Context, r controller.Runtime, l
6767
}
6868

6969
// watch link changes as some routes might need to be re-applied if the link appears
70-
watcher, err := watch.NewRtNetlink(r, unix.RTMGRP_LINK)
70+
watcher, err := watch.NewRtNetlink(watch.NewDefaultRateLimitedTrigger(ctx, r), unix.RTMGRP_LINK)
7171
if err != nil {
7272
return err
7373
}

internal/app/machined/pkg/controllers/network/link_status.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,14 @@ func (ctrl *LinkStatusController) Run(ctx context.Context, r controller.Runtime,
7272
// create watch connections to rtnetlink and ethtool via genetlink
7373
// these connections are used only to join multicast groups and receive notifications on changes
7474
// other connections are used to send requests and receive responses, as we can't mix the notifications and request/responses
75-
rtnetlinkWatcher, err := watch.NewRtNetlink(r, unix.RTMGRP_LINK)
75+
rtnetlinkWatcher, err := watch.NewRtNetlink(watch.NewDefaultRateLimitedTrigger(ctx, r), unix.RTMGRP_LINK)
7676
if err != nil {
7777
return err
7878
}
7979

8080
defer rtnetlinkWatcher.Done()
8181

82-
ethtoolWatcher, err := watch.NewEthtool(r)
82+
ethtoolWatcher, err := watch.NewEthtool(watch.NewDefaultRateLimitedTrigger(ctx, r))
8383
if err != nil {
8484
logger.Warn("ethtool watcher failed to start", zap.Error(err))
8585
} else {

internal/app/machined/pkg/controllers/network/operator_spec.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,10 @@ func (ctrl *OperatorSpecController) reconcileOperatorOutputs(ctx context.Context
302302
if err := apply(
303303
network.NewRouteSpec(
304304
network.ConfigNamespaceName,
305-
fmt.Sprintf("%s/%s", op.Operator.Prefix(), network.RouteID(routeSpec.Table, routeSpec.Family, routeSpec.Destination, routeSpec.Gateway, routeSpec.Priority)),
305+
fmt.Sprintf("%s/%s",
306+
op.Operator.Prefix(),
307+
network.RouteID(routeSpec.Table, routeSpec.Family, routeSpec.Destination, routeSpec.Gateway, routeSpec.Priority, routeSpec.OutLinkName),
308+
),
306309
),
307310
func(r resource.Resource) {
308311
*r.(*network.RouteSpec).TypedSpec() = routeSpec

internal/app/machined/pkg/controllers/network/platform_config.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,10 @@ func (ctrl *PlatformConfigController) apply(ctx context.Context, r controller.Ru
281281
idBuilder: func(spec interface{}) (resource.ID, error) {
282282
routeSpec := spec.(network.RouteSpecSpec) //nolint:errcheck,forcetypeassert
283283

284-
return network.LayeredID(network.ConfigPlatform, network.RouteID(routeSpec.Table, routeSpec.Family, routeSpec.Destination, routeSpec.Gateway, routeSpec.Priority)), nil
284+
return network.LayeredID(
285+
network.ConfigPlatform,
286+
network.RouteID(routeSpec.Table, routeSpec.Family, routeSpec.Destination, routeSpec.Gateway, routeSpec.Priority, routeSpec.OutLinkName),
287+
), nil
285288
},
286289
resourceBuilder: func(id string) resource.Resource {
287290
return network.NewRouteSpec(network.ConfigNamespaceName, id)

internal/app/machined/pkg/controllers/network/route_config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ func (ctrl *RouteConfigController) apply(ctx context.Context, r controller.Runti
155155

156156
for _, route := range routes {
157157
route := route
158-
id := network.LayeredID(route.ConfigLayer, network.RouteID(route.Table, route.Family, route.Destination, route.Gateway, route.Priority))
158+
id := network.LayeredID(route.ConfigLayer, network.RouteID(route.Table, route.Family, route.Destination, route.Gateway, route.Priority, route.OutLinkName))
159159

160160
if err := r.Modify(
161161
ctx,

internal/app/machined/pkg/controllers/network/route_config_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ func (suite *RouteConfigSuite) TestMachineConfiguration() {
231231

232232
suite.assertRoutes(
233233
[]string{
234-
"configuration/inet6/2001:470:6d:30e:8ed2:b60c:9d2f:803b//1024",
234+
"configuration/eth2/inet6/2001:470:6d:30e:8ed2:b60c:9d2f:803b//1024",
235235
"configuration/inet4/10.0.3.1/10.0.3.0/24/1024",
236236
"configuration/inet4/192.168.0.25/192.168.0.0/18/25",
237237
"configuration/inet4/192.244.0.1/192.244.0.0/24/1024",

0 commit comments

Comments
 (0)