diff --git a/agent/client/connector.go b/agent/client/connector.go index 84a71e5078..8e1f7cd6e3 100644 --- a/agent/client/connector.go +++ b/agent/client/connector.go @@ -26,6 +26,22 @@ func Connect(ctx context.Context, endpoint string, opts ...Option) (*Client, err return client, nil } +// See https://pkg.go.dev/google.golang.org/grpc/examples/features/retry#section-readme +// these values were calculated to get a max retry of aprox 120s. This script can be used to play with values: +// curl -SL https://gist.githubusercontent.com/schoren/5dd4dcadf133e4c56fa20c0b6a8d67ef/raw/1992d34d27368578d22e87b061ff00de6f7023be/calculate_max_time.sh | bash -s -- 0.1 2.0 5.0 120 +var retryPolicy = `{ + "methodConfig": [{ + "name": [{}], + "retryPolicy": { + "maxAttempts": 29, + "initialBackoff": "0.1s", + "maxBackoff": "5s", + "backoffMultiplier": 1.5, + "retryableStatusCodes": ["UNAVAILABLE"] + } + }] +}` + func connect(ctx context.Context, endpoint string) (*grpc.ClientConn, error) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -38,6 +54,7 @@ func connect(ctx context.Context, endpoint string) (*grpc.ClientConn, error) { conn, err := grpc.DialContext( ctx, endpoint, grpc.WithTransportCredentials(transportCredentials), + grpc.WithDefaultServiceConfig(retryPolicy), ) if err != nil { return nil, fmt.Errorf("could not connect to server: %w", err) diff --git a/agent/initialization/start.go b/agent/initialization/start.go index 44dbe3810d..eb24aa8236 100644 --- a/agent/initialization/start.go +++ b/agent/initialization/start.go @@ -17,7 +17,7 @@ import ( var ErrOtlpServerStart = errors.New("OTLP server start error") func NewClient(ctx context.Context, config config.Config, traceCache collector.TraceCache) (*client.Client, error) { - client, err := client.Connect(ctx, config.ServerURL, + controlPlaneClient, err := client.Connect(ctx, config.ServerURL, client.WithAPIKey(config.APIKey), client.WithAgentName(config.Name), ) @@ -25,44 +25,44 @@ func NewClient(ctx context.Context, config config.Config, traceCache collector.T return nil, err } - triggerWorker := workers.NewTriggerWorker(client, workers.WithTraceCache(traceCache)) - pollingWorker := workers.NewPollerWorker(client, workers.WithInMemoryDatastore( + triggerWorker := workers.NewTriggerWorker(controlPlaneClient, workers.WithTraceCache(traceCache)) + pollingWorker := workers.NewPollerWorker(controlPlaneClient, workers.WithInMemoryDatastore( poller.NewInMemoryDatastore(traceCache), )) - dataStoreTestConnectionWorker := workers.NewTestConnectionWorker(client) + dataStoreTestConnectionWorker := workers.NewTestConnectionWorker(controlPlaneClient) - client.OnDataStoreTestConnectionRequest(dataStoreTestConnectionWorker.Test) - client.OnTriggerRequest(triggerWorker.Trigger) - client.OnPollingRequest(pollingWorker.Poll) - client.OnConnectionClosed(func(ctx context.Context, sr *proto.ShutdownRequest) error { + controlPlaneClient.OnDataStoreTestConnectionRequest(dataStoreTestConnectionWorker.Test) + controlPlaneClient.OnTriggerRequest(triggerWorker.Trigger) + controlPlaneClient.OnPollingRequest(pollingWorker.Poll) + controlPlaneClient.OnConnectionClosed(func(ctx context.Context, sr *proto.ShutdownRequest) error { fmt.Printf("Server terminated the connection with the agent. Reason: %s\n", sr.Reason) - return client.Close() + return controlPlaneClient.Close() }) - return client, nil + return controlPlaneClient, nil } // Start the agent with given configuration -func Start(ctx context.Context, config config.Config) (*Session, error) { +func Start(ctx context.Context, cfg config.Config) (*Session, error) { traceCache := collector.NewTraceCache() - client, err := NewClient(ctx, config, traceCache) + controlPlaneClient, err := NewClient(ctx, cfg, traceCache) if err != nil { return nil, err } - err = client.Start(ctx) + err = controlPlaneClient.Start(ctx) if err != nil { return nil, err } - err = StartCollector(ctx, config, traceCache) + err = StartCollector(ctx, cfg, traceCache) if err != nil { return nil, err } return &Session{ - client: client, - Token: client.SessionConfiguration().AgentIdentification.Token, + client: controlPlaneClient, + Token: controlPlaneClient.SessionConfiguration().AgentIdentification.Token, }, nil }