-
Notifications
You must be signed in to change notification settings - Fork 38.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
improve e2e retry logic with standard wait.Poll() #8399
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ import ( | |
"github.com/GoogleCloudPlatform/kubernetes/pkg/client" | ||
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields" | ||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels" | ||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait" | ||
|
||
. "github.com/onsi/ginkgo" | ||
. "github.com/onsi/gomega" | ||
|
@@ -72,12 +73,14 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
// being run as the first e2e test just after the e2e cluster has been created. | ||
var err error | ||
const graceTime = 10 * time.Minute | ||
for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { | ||
start := time.Now() | ||
expectNoError(wait.Poll(5*time.Second, graceTime, func() (bool, error) { | ||
if _, err = s.Get("elasticsearch-logging"); err == nil { | ||
break | ||
return true, nil | ||
} | ||
Logf("Attempt to check for the existence of the Elasticsearch service failed after %v", time.Since(start)) | ||
} | ||
return false, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally you should return (true, err) here in cases of errors which are not worth retrying. That way you fail earlier, and get a more useful error message as output (e.g. "malformed request x" rather then "timed out"). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some test cases are worth retrying, right? |
||
})) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
// Wait for the Elasticsearch pods to enter the running state. | ||
|
@@ -95,7 +98,8 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
var statusCode float64 | ||
var esResponse map[string]interface{} | ||
err = nil | ||
for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { | ||
start = time.Now() | ||
expectNoError(wait.Poll(5*time.Second, graceTime, func() (bool, error) { | ||
// Query against the root URL for Elasticsearch. | ||
body, err := c.Get(). | ||
Namespace(api.NamespaceDefault). | ||
|
@@ -105,26 +109,26 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
DoRaw() | ||
if err != nil { | ||
Logf("After %v proxy call to elasticsearch-loigging failed: %v", time.Since(start), err) | ||
continue | ||
return false, nil | ||
} | ||
esResponse, err = bodyToJSON(body) | ||
if err != nil { | ||
Logf("After %v failed to convert Elasticsearch JSON response %v to map[string]interface{}: %v", time.Since(start), string(body), err) | ||
continue | ||
return false, nil | ||
} | ||
statusIntf, ok := esResponse["status"] | ||
if !ok { | ||
Logf("After %v Elasticsearch response has no status field: %v", time.Since(start), esResponse) | ||
continue | ||
return false, nil | ||
} | ||
statusCode, ok = statusIntf.(float64) | ||
if !ok { | ||
// Assume this is a string returning Failure. Retry. | ||
Logf("After %v expected status to be a float64 but got %v of type %T", time.Since(start), statusIntf, statusIntf) | ||
continue | ||
return false, nil | ||
} | ||
break | ||
} | ||
return true, nil | ||
})) | ||
Expect(err).NotTo(HaveOccurred()) | ||
if int(statusCode) != 200 { | ||
Failf("Elasticsearch cluster has a bad status: %v", statusCode) | ||
|
@@ -233,7 +237,8 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
By("Checking all the log lines were ingested into Elasticsearch") | ||
missing := 0 | ||
expected := nodeCount * countTo | ||
for start := time.Now(); time.Since(start) < graceTime; time.Sleep(10 * time.Second) { | ||
start = time.Now() | ||
expectNoError(wait.Poll(10*time.Second, graceTime, func() (bool, error) { | ||
// Ask Elasticsearch to return all the log lines that were tagged with the underscore | ||
// verison of the name. Ask for twice as many log lines as we expect to check for | ||
// duplication bugs. | ||
|
@@ -248,13 +253,13 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
DoRaw() | ||
if err != nil { | ||
Logf("After %v failed to make proxy call to elasticsearch-logging: %v", time.Since(start), err) | ||
continue | ||
return false, nil | ||
} | ||
|
||
response, err := bodyToJSON(body) | ||
if err != nil { | ||
Logf("After %v failed to unmarshal response: %v", time.Since(start), err) | ||
continue | ||
return false, nil | ||
} | ||
hits, ok := response["hits"].(map[string]interface{}) | ||
if !ok { | ||
|
@@ -263,17 +268,17 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
totalF, ok := hits["total"].(float64) | ||
if !ok { | ||
Logf("After %v hits[total] not of the expected type: %T", time.Since(start), hits["total"]) | ||
continue | ||
return false, nil | ||
} | ||
total := int(totalF) | ||
if total < expected { | ||
Logf("After %v expecting to find %d log lines but saw only %d", time.Since(start), expected, total) | ||
continue | ||
return false, nil | ||
} | ||
h, ok := hits["hits"].([]interface{}) | ||
if !ok { | ||
Logf("After %v hits not of the expected type: %T", time.Since(start), hits["hits"]) | ||
continue | ||
return false, nil | ||
} | ||
// Initialize data-structure for observing counts. | ||
observed := make([][]int, nodeCount) | ||
|
@@ -329,10 +334,10 @@ func ClusterLevelLoggingWithElasticsearch(c *client.Client) { | |
} | ||
if missing != 0 { | ||
Logf("After %v still missing %d log lines", time.Since(start), missing) | ||
continue | ||
return false, nil | ||
} | ||
Logf("After %s found all %d log lines", time.Since(start), expected) | ||
return | ||
} | ||
return true, nil | ||
})) | ||
Failf("Failed to find all %d log lines", expected) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can remove this and use graceTime instead in logging.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry - I'm confused. Ignore the above comment.