From d6186b854c6502b767dcee0cffe8900741e5e6f4 Mon Sep 17 00:00:00 2001 From: Donal Hurley Date: Thu, 4 Sep 2025 09:40:29 +0100 Subject: [PATCH 1/3] Ensure that a file overview update is performed even if the initial create connection request takes a long time --- internal/file/file_service_operator.go | 27 ++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/internal/file/file_service_operator.go b/internal/file/file_service_operator.go index 92931c430..429a19abf 100644 --- a/internal/file/file_service_operator.go +++ b/internal/file/file_service_operator.go @@ -131,9 +131,25 @@ func (fso *FileServiceOperator) UpdateOverview( ConfigPath: configPath, }, } + backoffSettings := fso.agentConfig.Client.Backoff - backOffCtx, backoffCancel := context.WithTimeout(newCtx, fso.agentConfig.Client.Backoff.MaxElapsedTime) - defer backoffCancel() + // If the create connection takes a long time that we wait indefinitely to do + // the initial file overview update to ensure that the management plane has a file overview + // on agent startup. + if !fso.isConnected.Load() { + slog.DebugContext( + newCtx, + "Not connected to management plane yet, "+ + "retrying indefinitely to update file overview until connection is created", + ) + backoffSettings = &config.BackOff{ + InitialInterval: fso.agentConfig.Client.Backoff.InitialInterval, + MaxInterval: fso.agentConfig.Client.Backoff.MaxInterval, + MaxElapsedTime: 0, + RandomizationFactor: fso.agentConfig.Client.Backoff.RandomizationFactor, + Multiplier: fso.agentConfig.Client.Backoff.Multiplier, + } + } sendUpdateOverview := func() (*mpi.UpdateOverviewResponse, error) { if fso.fileServiceClient == nil { @@ -162,10 +178,9 @@ func (fso *FileServiceOperator) UpdateOverview( return response, nil } - backoffSettings := fso.agentConfig.Client.Backoff response, err := backoff.RetryWithData( sendUpdateOverview, - backoffHelpers.Context(backOffCtx, backoffSettings), + backoffHelpers.Context(newCtx, backoffSettings), ) if err != nil { return err @@ -174,13 +189,13 @@ func (fso *FileServiceOperator) UpdateOverview( slog.DebugContext(newCtx, "UpdateOverview response", "response", response) if response.GetOverview() == nil { - slog.DebugContext(ctx, "UpdateOverview response is empty") + slog.DebugContext(newCtx, "UpdateOverview response is empty") return nil } delta := files.ConvertToMapOfFiles(response.GetOverview().GetFiles()) if len(delta) != 0 { - return fso.updateFiles(ctx, delta, instanceID, configPath, iteration) + return fso.updateFiles(newCtx, delta, instanceID, configPath, iteration) } return err From fbb076cce464792e42fe56764d5c8969820f1c73 Mon Sep 17 00:00:00 2001 From: Donal Hurley Date: Fri, 5 Sep 2025 11:29:17 +0100 Subject: [PATCH 2/3] Update unit tests --- internal/file/file_service_operator_test.go | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/internal/file/file_service_operator_test.go b/internal/file/file_service_operator_test.go index ba092a9f9..f8206e145 100644 --- a/internal/file/file_service_operator_test.go +++ b/internal/file/file_service_operator_test.go @@ -12,6 +12,7 @@ import ( "sync" "sync/atomic" "testing" + "time" mpi "github.com/nginx/agent/v3/api/grpc/mpi/v1" "github.com/nginx/agent/v3/api/grpc/mpi/v1/v1fakes" @@ -97,6 +98,30 @@ func TestFileServiceOperator_UpdateOverview_MaxIterations(t *testing.T) { assert.Equal(t, "too many UpdateOverview attempts", err.Error()) } +func TestFileServiceOperator_UpdateOverview_NoConnection(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond) + defer cancel() + + filePath := filepath.Join(t.TempDir(), "nginx.conf") + fileMeta := protos.FileMeta(filePath, "") + + fakeFileServiceClient := &v1fakes.FakeFileServiceClient{} + + agentConfig := types.AgentConfig() + agentConfig.Client.Backoff.MaxElapsedTime = 200 * time.Millisecond + + fileServiceOperator := NewFileServiceOperator(types.AgentConfig(), fakeFileServiceClient, &sync.RWMutex{}) + fileServiceOperator.SetIsConnected(false) + + err := fileServiceOperator.UpdateOverview(ctx, "123", []*mpi.File{ + { + FileMeta: fileMeta, + }, + }, filePath, 0) + + assert.ErrorIs(t, err, context.DeadlineExceeded) +} + func TestFileManagerService_UpdateFile(t *testing.T) { tests := []struct { name string From 141d893abbf166e881d93583c0c009059859d68c Mon Sep 17 00:00:00 2001 From: Donal Hurley Date: Tue, 9 Sep 2025 10:12:50 +0100 Subject: [PATCH 3/3] Fix context --- internal/file/file_manager_service.go | 1 + internal/file/file_service_operator.go | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/internal/file/file_manager_service.go b/internal/file/file_manager_service.go index 869284492..162800f64 100644 --- a/internal/file/file_manager_service.go +++ b/internal/file/file_manager_service.go @@ -244,6 +244,7 @@ func (fms *FileManagerService) ConfigUpdate(ctx context.Context, "error", err, ) } + slog.InfoContext(ctx, "Finished updating file overview") } func (fms *FileManagerService) ConfigUpload(ctx context.Context, configUploadRequest *mpi.ConfigUploadRequest) error { diff --git a/internal/file/file_service_operator.go b/internal/file/file_service_operator.go index 429a19abf..4d902bcd1 100644 --- a/internal/file/file_service_operator.go +++ b/internal/file/file_service_operator.go @@ -131,7 +131,14 @@ func (fso *FileServiceOperator) UpdateOverview( ConfigPath: configPath, }, } - backoffSettings := fso.agentConfig.Client.Backoff + + backoffSettings := &config.BackOff{ + InitialInterval: fso.agentConfig.Client.Backoff.InitialInterval, + MaxInterval: fso.agentConfig.Client.Backoff.MaxInterval, + MaxElapsedTime: fso.agentConfig.Client.Backoff.MaxElapsedTime, + RandomizationFactor: fso.agentConfig.Client.Backoff.RandomizationFactor, + Multiplier: fso.agentConfig.Client.Backoff.Multiplier, + } // If the create connection takes a long time that we wait indefinitely to do // the initial file overview update to ensure that the management plane has a file overview @@ -194,8 +201,10 @@ func (fso *FileServiceOperator) UpdateOverview( } delta := files.ConvertToMapOfFiles(response.GetOverview().GetFiles()) + // Make sure that the original context is used if a file upload is required so that original correlation ID + // can be used again for update file overview request if len(delta) != 0 { - return fso.updateFiles(newCtx, delta, instanceID, configPath, iteration) + return fso.updateFiles(ctx, delta, instanceID, configPath, iteration) } return err