Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move rest of performance data gathered by tests to Summaries #45590

Merged
merged 1 commit into from
May 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 8 additions & 38 deletions test/e2e/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path"
"reflect"
Expand Down Expand Up @@ -91,6 +90,10 @@ type Framework struct {

// configuration for framework's client
Options FrameworkOptions

// Place where various additional data is stored during test run to be printed to ReportDir,
// or stdout if ReportDir is not set once test ends.
TestSummaries []TestDataSummary
}

type TestDataSummary interface {
Expand Down Expand Up @@ -307,19 +310,18 @@ func (f *Framework) AfterEach() {
LogContainersInPodsWithLabels(f.ClientSet, metav1.NamespaceSystem, ImagePullerLabels, "image-puller", logFunc)
}

summaries := make([]TestDataSummary, 0)
if TestContext.GatherKubeSystemResourceUsageData != "false" && TestContext.GatherKubeSystemResourceUsageData != "none" && f.gatherer != nil {
By("Collecting resource usage data")
summary, resourceViolationError := f.gatherer.stopAndSummarize([]int{90, 99, 100}, f.AddonResourceConstraints)
defer ExpectNoError(resourceViolationError)
summaries = append(summaries, summary)
f.TestSummaries = append(f.TestSummaries, summary)
}

if TestContext.GatherLogsSizes {
By("Gathering log sizes data")
close(f.logsSizeCloseChannel)
f.logsSizeWaitGroup.Wait()
summaries = append(summaries, f.logsSizeVerifier.GetSummary())
f.TestSummaries = append(f.TestSummaries, f.logsSizeVerifier.GetSummary())
}

if TestContext.GatherMetricsAfterTest {
Expand All @@ -334,44 +336,12 @@ func (f *Framework) AfterEach() {
if err != nil {
Logf("MetricsGrabber failed to grab metrics (skipping metrics gathering): %v", err)
} else {
summaries = append(summaries, (*MetricsForE2E)(&received))
f.TestSummaries = append(f.TestSummaries, (*MetricsForE2E)(&received))
}
}
}

outputTypes := strings.Split(TestContext.OutputPrintType, ",")
now := time.Now()
for _, printType := range outputTypes {
switch printType {
case "hr":
for i := range summaries {
if TestContext.ReportDir == "" {
Logf(summaries[i].PrintHumanReadable())
} else {
// TODO: learn to extract test name and append it to the kind instead of timestamp.
filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+f.BaseName+"_"+now.Format(time.RFC3339)+".txt")
if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintHumanReadable()), 0644); err != nil {
Logf("Failed to write file %v with test performance data: %v", filePath, err)
}
}
}
case "json":
for i := range summaries {
if TestContext.ReportDir == "" {
Logf("%v JSON\n%v", summaries[i].SummaryKind(), summaries[i].PrintJSON())
Logf("Finished")
} else {
// TODO: learn to extract test name and append it to the kind instead of timestamp.
filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+f.BaseName+"_"+now.Format(time.RFC3339)+".json")
if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintJSON()), 0644); err != nil {
Logf("Failed to write file %v with test performance data: %v", filePath, err)
}
}
}
default:
Logf("Unknown output type: %v. Skipping.", printType)
}
}
PrintSummaries(f.TestSummaries, f.BaseName)

// Check whether all nodes are ready after the test.
// This is explicitly done at the very end of the test, to avoid
Expand Down
97 changes: 62 additions & 35 deletions test/e2e/framework/metrics_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,36 @@ type PodStartupLatency struct {
Latency LatencyMetric `json:"latency"`
}

func (l *PodStartupLatency) SummaryKind() string {
return "PodStartupLatency"
}

func (l *PodStartupLatency) PrintHumanReadable() string {
return PrettyPrintJSON(*l)
}

func (l *PodStartupLatency) PrintJSON() string {
return PrettyPrintJSON(*l)
}

type SchedulingLatency struct {
Scheduling LatencyMetric `json:"scheduling"`
Binding LatencyMetric `json:"binding"`
Total LatencyMetric `json:"total"`
}

func (l *SchedulingLatency) SummaryKind() string {
return "SchedulingLatency"
}

func (l *SchedulingLatency) PrintHumanReadable() string {
return PrettyPrintJSON(*l)
}

func (l *SchedulingLatency) PrintJSON() string {
return PrettyPrintJSON(*l)
}

type SaturationTime struct {
TimeToSaturate time.Duration `json:"timeToStaturate"`
NumberOfNodes int `json:"numberOfNodes"`
Expand All @@ -174,9 +198,23 @@ type APIResponsiveness struct {
APICalls []APICall `json:"apicalls"`
}

func (a APIResponsiveness) Len() int { return len(a.APICalls) }
func (a APIResponsiveness) Swap(i, j int) { a.APICalls[i], a.APICalls[j] = a.APICalls[j], a.APICalls[i] }
func (a APIResponsiveness) Less(i, j int) bool {
func (a *APIResponsiveness) SummaryKind() string {
return "APIResponsiveness"
}

func (a *APIResponsiveness) PrintHumanReadable() string {
return PrettyPrintJSON(*a)
}

func (a *APIResponsiveness) PrintJSON() string {
return PrettyPrintJSON(*a)
}

func (a *APIResponsiveness) Len() int { return len(a.APICalls) }
func (a *APIResponsiveness) Swap(i, j int) {
a.APICalls[i], a.APICalls[j] = a.APICalls[j], a.APICalls[i]
}
func (a *APIResponsiveness) Less(i, j int) bool {
return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99
}

Expand Down Expand Up @@ -225,17 +263,17 @@ func (a *APIResponsiveness) addMetricRequestCount(resource, verb string, count i
a.APICalls = append(a.APICalls, apicall)
}

func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
var a APIResponsiveness

body, err := getMetrics(c)
if err != nil {
return a, err
return nil, err
}

samples, err := extractMetricSamples(body)
if err != nil {
return a, err
return nil, err
}

ignoredResources := sets.NewString("events")
Expand All @@ -262,7 +300,7 @@ func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
latency := sample.Value
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
if err != nil {
return a, err
return nil, err
}
a.addMetricRequestLatency(resource, verb, quantile, time.Duration(int64(latency))*time.Microsecond)
case "apiserver_request_count":
Expand All @@ -272,22 +310,22 @@ func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
}
}

return a, err
return &a, err
}

// Prints top five summary metrics for request types with latency and returns
// number of such request types above threshold.
func HighLatencyRequests(c clientset.Interface) (int, error) {
func HighLatencyRequests(c clientset.Interface) (int, *APIResponsiveness, error) {
metrics, err := readLatencyMetrics(c)
if err != nil {
return 0, err
return 0, metrics, err
}
sort.Sort(sort.Reverse(metrics))
badMetrics := 0
top := 5
for _, metric := range metrics.APICalls {
for i := range metrics.APICalls {
isBad := false
if metric.Latency.Perc99 > apiCallLatencyThreshold {
if metrics.APICalls[i].Latency.Perc99 > apiCallLatencyThreshold {
badMetrics++
isBad = true
}
Expand All @@ -297,23 +335,15 @@ func HighLatencyRequests(c clientset.Interface) (int, error) {
if isBad {
prefix = "WARNING "
}
Logf("%vTop latency metric: %+v", prefix, metric)
Logf("%vTop latency metric: %+v", prefix, metrics.APICalls[i])
}
}

// TODO(random-liu): Remove the log when we migrate to new perfdash
Logf("API calls latencies: %s", PrettyPrintJSON(metrics))
// Log perf data
PrintPerfData(ApiCallToPerfData(metrics))

return badMetrics, nil
return badMetrics, metrics, nil
}

// Verifies whether 50, 90 and 99th percentiles of PodStartupLatency are
// within the threshold.
func VerifyPodStartupLatency(latency PodStartupLatency) error {
Logf("Pod startup latency: %s", PrettyPrintJSON(latency))

func VerifyPodStartupLatency(latency *PodStartupLatency) error {
if latency.Latency.Perc50 > podStartupThreshold {
return fmt.Errorf("too high pod startup latency 50th percentile: %v", latency.Latency.Perc50)
}
Expand Down Expand Up @@ -349,7 +379,7 @@ func getMetrics(c clientset.Interface) (string, error) {
}

// Retrieves scheduler metrics information.
func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
result := SchedulingLatency{}

// Check if master Node is registered
Expand All @@ -358,7 +388,7 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {

subResourceProxyAvailable, err := ServerVersionGTE(SubResourcePodProxyVersion, c.Discovery())
if err != nil {
return result, err
return nil, err
}

var data string
Expand Down Expand Up @@ -400,13 +430,13 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
cmd := "curl http://localhost:10251/metrics"
sshResult, err := SSH(cmd, GetMasterHost()+":22", TestContext.Provider)
if err != nil || sshResult.Code != 0 {
return result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
return &result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
}
data = sshResult.Stdout
}
samples, err := extractMetricSamples(data)
if err != nil {
return result, err
return nil, err
}

for _, sample := range samples {
Expand All @@ -426,23 +456,20 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
latency := sample.Value
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
if err != nil {
return result, err
return nil, err
}
setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond)
}
return result, nil
return &result, nil
}

// Verifies (currently just by logging them) the scheduling latencies.
func VerifySchedulerLatency(c clientset.Interface) error {
func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) {
latency, err := getSchedulingLatency(c)
if err != nil {
return err
return nil, err
}
Logf("Scheduling latency: %s", PrettyPrintJSON(latency))

// TODO: Add some reasonable checks once we know more about the values.
return nil
return latency, nil
}

func PrettyPrintJSON(metrics interface{}) string {
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/framework/test_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func RegisterCommonFlags() {
flag.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.")
flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
flag.BoolVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "hr", "Comma separated list: 'hr' for human readable summaries 'json' for JSON ones.")
flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
flag.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")
flag.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/framework/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -5526,3 +5526,38 @@ func GetAzureCloud() (*azure.Cloud, error) {
}
return cloud, nil
}

func PrintSummaries(summaries []TestDataSummary, testBaseName string) {
now := time.Now()
for i := range summaries {
Logf("Printing summary: %v", summaries[i].SummaryKind())
switch TestContext.OutputPrintType {
case "hr":
if TestContext.ReportDir == "" {
Logf(summaries[i].PrintHumanReadable())
} else {
// TODO: learn to extract test name and append it to the kind instead of timestamp.
filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+testBaseName+"_"+now.Format(time.RFC3339)+".txt")
if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintHumanReadable()), 0644); err != nil {
Logf("Failed to write file %v with test performance data: %v", filePath, err)
}
}
case "json":
fallthrough
default:
if TestContext.OutputPrintType != "json" {
Logf("Unknown output type: %v. Printing JSON", TestContext.OutputPrintType)
}
if TestContext.ReportDir == "" {
Logf("%v JSON\n%v", summaries[i].SummaryKind(), summaries[i].PrintJSON())
Logf("Finished")
} else {
// TODO: learn to extract test name and append it to the kind instead of timestamp.
filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+testBaseName+"_"+now.Format(time.RFC3339)+".json")
if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintJSON()), 0644); err != nil {
Logf("Failed to write file %v with test performance data: %v", filePath, err)
}
}
}
}
}