From 97921915a801dd82b1f5a70e0a69353539c1e3ae Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Fri, 3 Feb 2023 13:43:22 -0500 Subject: [PATCH] distribution/xfer: fix download fencepost bug maxDownloadAttempts maps to the daemon configuration flag --max-download-attempts int Set the max download attempts for each pull (default 5) and the daemon configuration machinery interprets a value of 0 as "apply the default value" and not a valid user value (config validation/ normalization bugs notwithstanding). The intention is clearly that this configuration value should be an upper limit on the number of times the daemon should try to download a particular layer before giving up. So it is surprising to have the configuration value interpreted as a _retry_ limit. The daemon will make up to N+1 attempts to download a layer! This also means users cannot disable retries even if they wanted to. Fix the fencepost bug so that max attempts really means max attempts, not max retries. And fix the fencepost bug with the retry-backoff delay so that the first backoff is 5s, not 10s. Signed-off-by: Cory Snider --- distribution/xfer/download.go | 12 ++++++------ distribution/xfer/download_test.go | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/distribution/xfer/download.go b/distribution/xfer/download.go index 32f2338e968ac..e77c564513cb8 100644 --- a/distribution/xfer/download.go +++ b/distribution/xfer/download.go @@ -267,7 +267,7 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor, downloadReader io.ReadCloser size int64 err error - retries int + attempt int = 1 ) defer descriptor.Close() @@ -287,16 +287,16 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor, default: } - retries++ - if _, isDNR := err.(DoNotRetry); isDNR || retries > ldm.maxDownloadAttempts { - logrus.Errorf("Download failed after %d attempts: %v", retries, err) + if _, isDNR := err.(DoNotRetry); isDNR || attempt >= ldm.maxDownloadAttempts { + logrus.Errorf("Download failed after %d attempts: %v", attempt, err) d.err = err return } - logrus.Infof("Download failed, retrying (%d/%d): %v", retries, ldm.maxDownloadAttempts, err) - delay := retries * 5 + logrus.Infof("Download failed, retrying (%d/%d): %v", attempt, ldm.maxDownloadAttempts, err) + delay := attempt * 5 ticker := time.NewTicker(ldm.waitDuration) + attempt++ selectLoop: for { diff --git a/distribution/xfer/download_test.go b/distribution/xfer/download_test.go index ced9575fb003a..f45598517db7b 100644 --- a/distribution/xfer/download_test.go +++ b/distribution/xfer/download_test.go @@ -216,7 +216,7 @@ func (d *mockDownloadDescriptor) Download(ctx context.Context, progressOutput pr if d.retries < d.simulateRetries { d.retries++ - return nil, 0, fmt.Errorf("simulating download attempt %d/%d", d.retries, d.simulateRetries) + return nil, 0, fmt.Errorf("simulating download attempt failure %d/%d", d.retries, d.simulateRetries) } return d.mockTarStream(), 0, nil @@ -367,25 +367,25 @@ func TestMaxDownloadAttempts(t *testing.T) { }{ { name: "max-attempts=5, succeed at 2nd attempt", - simulateRetries: 2, + simulateRetries: 1, maxDownloadAttempts: 5, }, { name: "max-attempts=5, succeed at 5th attempt", - simulateRetries: 5, + simulateRetries: 4, maxDownloadAttempts: 5, }, { - name: "max-attempts=5, fail at 6th attempt", - simulateRetries: 6, + name: "max-attempts=5, fail at 5th attempt", + simulateRetries: 5, maxDownloadAttempts: 5, - expectedErr: "simulating download attempt 5/6", + expectedErr: "simulating download attempt failure 5/5", }, { - name: "max-attempts=0, fail after 1 attempt", + name: "max-attempts=1, fail after 1 attempt", simulateRetries: 1, - maxDownloadAttempts: 0, - expectedErr: "simulating download attempt 1/1", + maxDownloadAttempts: 1, + expectedErr: "simulating download attempt failure 1/1", }, } for _, tc := range tests {