Skip to content

Commit

Permalink
In GetRemoteSignedCertificate, if the certificate hasn't been issued …
Browse files Browse the repository at this point in the history
…in 5 seconds it errors

with a context deadline exceeded and does not retry.  Update it so that if the node has not
been updated within 5 seconds, attempt to get the node status again after an exponential
backoff.

If NodeCertificateStatus errors with some other error (not context deadline exceeded),
GetRemoteSignedCertificate will try again with a different connection.

Signed-off-by: cyli <ying.li@docker.com>
  • Loading branch information
cyli committed Mar 28, 2017
1 parent 970b45a commit fa5f8be
Show file tree
Hide file tree
Showing 3 changed files with 343 additions and 18 deletions.
45 changes: 35 additions & 10 deletions ca/certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, kw KeyWrit
// the local connection will not be returned by the connection
// broker anymore.
config.ForceRemote = true

}
if err != nil {
return nil, err
Expand Down Expand Up @@ -773,7 +772,6 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50
if rootCAPool == nil {
return nil, errors.New("valid root CA pool required")
}

creds := config.Credentials

if creds == nil {
Expand Down Expand Up @@ -810,17 +808,39 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50

// Exponential backoff with Max of 30 seconds to wait for a new retry
for {
timeout := 5 * time.Second
if config.NodeCertificateStatusRequestTimeout > 0 {
timeout = config.NodeCertificateStatusRequestTimeout
}
// Send the Request and retrieve the certificate
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
stateCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
statusResponse, err := caClient.NodeCertificateStatus(ctx, statusRequest)
if err != nil {
statusResponse, err := caClient.NodeCertificateStatus(stateCtx, statusRequest)
switch {
case err != nil && grpc.Code(err) != codes.DeadlineExceeded:
conn.Close(false)
return nil, err
}
// Because IssueNodeCertificate succeeded, if this call failed likely it is due to an issue with this
// particular connection, so we need to get another. We should try a remote connection - the local node
// may be a manager that was demoted, so the local connection (which is preferred) may not work.
config.ForceRemote = true
conn, err = getGRPCConnection(creds, config.ConnBroker, config.ForceRemote)
if err != nil {
return nil, err
}
caClient = api.NewNodeCAClient(conn.ClientConn)

case err != nil: // this was a deadline exceeded error - we need to figure out which context
select { // the entire `GetRemoteSignedCertificate` call context was cancelled - return the error
case <-ctx.Done():
conn.Close(true)
return nil, err
default:
// the RPC call to `NodeCertificateStatus` was cancelled - retry the next loop through after an
// exponential backoff
}

// If the certificate was issued, return
if statusResponse.Status.State == api.IssuanceStateIssued {
case statusResponse.Status.State == api.IssuanceStateIssued:
if statusResponse.Certificate == nil {
conn.Close(false)
return nil, errors.New("no certificate in CertificateStatus response")
Expand All @@ -838,9 +858,14 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50
}

// If we're still pending, the issuance failed, or the state is unknown
// let's continue trying.
// let's continue trying after an exponential backoff
expBackoff.Failure(nil, nil)
time.Sleep(expBackoff.Proceed(nil))
select {
case <-ctx.Done():
conn.Close(true)
return nil, err
case <-time.After(expBackoff.Proceed(nil)):
}
}
}

Expand Down
Loading

0 comments on commit fa5f8be

Please sign in to comment.