Skip to content

Commit

Permalink
test: clear connection refused errors after reset
Browse files Browse the repository at this point in the history
After node reboot (and gRPC API unavailability), gRPC stack might cache
connection refused errors for up to backoff timeout. Explicitly clear
such errors in reset tests before trying to read data from the node to
verify reset success.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
  • Loading branch information
smira authored and talos-bot committed Feb 1, 2021
1 parent c36e4a9 commit 87ccf0e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 19 deletions.
2 changes: 2 additions & 0 deletions internal/integration/api/apply-config.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ func (suite *ApplyConfigSuite) TestApplyNoReboot() {
suite.WaitForBootDone(suite.ctx)

node := suite.RandomDiscoveredNode()
suite.ClearConnectionRefused(suite.ctx, node)

nodeCtx := client.WithNodes(suite.ctx, node)

provider, err := suite.readConfigFromNode(nodeCtx)
Expand Down
8 changes: 8 additions & 0 deletions internal/integration/api/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ func (suite *ResetSuite) TestResetNodeByNode() {
return base.IgnoreGRPCUnavailable(suite.Client.Reset(nodeCtx, true, true))
}, 10*time.Minute)

suite.ClearConnectionRefused(suite.ctx, node)

postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)

Expand Down Expand Up @@ -156,6 +158,8 @@ func (suite *ResetSuite) TestResetNoGraceful() {
return base.IgnoreGRPCUnavailable(suite.Client.Reset(nodeCtx, false, true))
}, 5*time.Minute)

suite.ClearConnectionRefused(suite.ctx, node)

postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)

Expand Down Expand Up @@ -195,6 +199,8 @@ func (suite *ResetSuite) TestResetWithSpecEphemeral() {
}))
}, 5*time.Minute)

suite.ClearConnectionRefused(suite.ctx, node)

postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)

Expand Down Expand Up @@ -236,6 +242,8 @@ func (suite *ResetSuite) TestResetWithSpecState() {
}))
}, 5*time.Minute)

suite.ClearConnectionRefused(suite.ctx, node)

postReset, err := suite.hashKubeletCert(suite.ctx, node)
suite.Require().NoError(err)

Expand Down
48 changes: 29 additions & 19 deletions internal/integration/base/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,7 @@ func (apiSuite *APISuite) SetupSuite() {

if len(nodes) > 0 {
// grpc might trigger backoff on reconnect attempts, so make sure we clear them
ctx, cancel := context.WithTimeout(context.Background(), backoff.DefaultConfig.MaxDelay)
defer cancel()

apiSuite.Require().NoError(retry.Constant(backoff.DefaultConfig.MaxDelay, retry.WithUnits(time.Second)).Retry(func() error {
for i := 0; i < len(nodes); i++ {
_, err = apiSuite.Client.Version(client.WithNodes(ctx, nodes...))
if err == nil {
continue
}

if strings.Contains(err.Error(), "connection refused") {
return retry.ExpectedError(err)
}

return retry.UnexpectedError(err)
}

return nil
}))
apiSuite.ClearConnectionRefused(context.Background(), nodes...)
}
}

Expand Down Expand Up @@ -278,6 +260,34 @@ func (apiSuite *APISuite) WaitForBootDone(ctx context.Context) {
apiSuite.Require().Empty(nodesNotDoneBooting)
}

// ClearConnectionRefused clears cached connection refused errors which might be left after node reboot.
func (apiSuite *APISuite) ClearConnectionRefused(ctx context.Context, nodes ...string) {
ctx, cancel := context.WithTimeout(ctx, backoff.DefaultConfig.MaxDelay)
defer cancel()

numMasterNodes := len(apiSuite.DiscoverNodes().NodesByType(machine.TypeControlPlane)) + len(apiSuite.DiscoverNodes().NodesByType(machine.TypeInit))
if numMasterNodes == 0 {
numMasterNodes = 3
}

apiSuite.Require().NoError(retry.Constant(backoff.DefaultConfig.MaxDelay, retry.WithUnits(time.Second)).Retry(func() error {
for i := 0; i < numMasterNodes; i++ {
_, err := apiSuite.Client.Version(client.WithNodes(ctx, nodes...))
if err == nil {
continue
}

if strings.Contains(err.Error(), "connection refused") {
return retry.ExpectedError(err)
}

return retry.UnexpectedError(err)
}

return nil
}))
}

// TearDownSuite closes Talos API client.
func (apiSuite *APISuite) TearDownSuite() {
if apiSuite.Client != nil {
Expand Down

0 comments on commit 87ccf0e

Please sign in to comment.