Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logs node e2e perf data to standalone json files #47260

Merged
merged 1 commit into from
Jun 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions hack/.linted_packages
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ staging/src/k8s.io/metrics/pkg/apis/metrics/install
staging/src/k8s.io/sample-apiserver
staging/src/k8s.io/sample-apiserver/pkg/apis/wardle/install
test/e2e/perftype
test/e2e_node/perftype
test/e2e_node/runner/local
test/images/clusterapi-tester
test/images/entrypoint-tester
Expand Down
11 changes: 6 additions & 5 deletions test/e2e/framework/perf_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ func PodStartupLatencyToPerfData(latency *PodStartupLatency) *perftype.PerfData
return perfData
}

// currentKubeletPerfMetricsVersion is the current kubelet performance metrics version. We should
// bump up the version each time we make incompatible change to the metrics.
const currentKubeletPerfMetricsVersion = "v2"
// CurrentKubeletPerfMetricsVersion is the current kubelet performance metrics
// version. This is used by mutiple perf related data structures. We should
// bump up the version each time we make an incompatible change to the metrics.
const CurrentKubeletPerfMetricsVersion = "v2"

// ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function
// only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
Expand Down Expand Up @@ -119,7 +120,7 @@ func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
Version: CurrentKubeletPerfMetricsVersion,
DataItems: items,
Labels: labels,
}
Expand Down Expand Up @@ -149,7 +150,7 @@ func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[strin
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
Version: CurrentKubeletPerfMetricsVersion,
DataItems: items,
Labels: labels,
}
Expand Down
2 changes: 2 additions & 0 deletions test/e2e_node/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ go_library(
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e/perftype:go_default_library",
"//test/e2e_node/perftype:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/google/cadvisor/client/v2:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
Expand Down Expand Up @@ -163,6 +164,7 @@ filegroup(
":package-srcs",
"//test/e2e_node/builder:all-srcs",
"//test/e2e_node/environment:all-srcs",
"//test/e2e_node/perftype:all-srcs",
"//test/e2e_node/remote:all-srcs",
"//test/e2e_node/runner/local:all-srcs",
"//test/e2e_node/runner/remote:all-srcs",
Expand Down
66 changes: 47 additions & 19 deletions test/e2e_node/benchmark_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,57 @@ package e2e_node

import (
"fmt"
"io/ioutil"
"path"
"sort"
"strconv"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/perftype"
nodeperftype "k8s.io/kubernetes/test/e2e_node/perftype"

. "github.com/onsi/gomega"
)

const (
// TODO(coufon): be consistent with perf_util.go version
currentDataVersion = "v1"
TimeSeriesTag = "[Result:TimeSeries]"
TimeSeriesEnd = "[Finish:TimeSeries]"
TimeSeriesTag = "[Result:TimeSeries]"
TimeSeriesEnd = "[Finish:TimeSeries]"
)

type NodeTimeSeries struct {
// value in OperationData is an array of timestamps
OperationData map[string][]int64 `json:"op_series,omitempty"`
ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"`
Labels map[string]string `json:"labels"`
Version string `json:"version"`
// dumpDataToFile inserts the current timestamp into the labels and writes the
// data for the test into the file with the specified prefix.
func dumpDataToFile(data interface{}, labels map[string]string, prefix string) {
testName := labels["test"]
fileName := path.Join(framework.TestContext.ReportDir, fmt.Sprintf("%s-%s.json", prefix, testName))
labels["timestamp"] = strconv.FormatInt(time.Now().UTC().Unix(), 10)
framework.Logf("Dumping perf data for test %q to %q.", testName, fileName)
if err := ioutil.WriteFile(fileName, []byte(framework.PrettyPrintJSON(data)), 0644); err != nil {
framework.Logf("Failed to write perf data for test %q to %q: %v", testName, fileName, err)
}
}

// logPerfData writes the perf data to a standalone json file if the
// framework.TestContext.ReportDir is non-empty, or to the general build log
// otherwise. The perfType identifies which type of the perf data it is, such
// as "cpu" and "memory". If an error occurs, no perf data will be logged.
func logPerfData(p *perftype.PerfData, perfType string) {
if framework.TestContext.ReportDir == "" {
framework.PrintPerfData(p)
return
}
dumpDataToFile(p, p.Labels, "performance-"+perfType)
}

// logDensityTimeSeries logs the time series data of operation and resource usage
// logDensityTimeSeries writes the time series data of operation and resource
// usage to a standalone json file if the framework.TestContext.ReportDir is
// non-empty, or to the general build log otherwise. If an error occurs,
// no perf data will be logged.
func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1.Time, testInfo map[string]string) {
timeSeries := &NodeTimeSeries{
timeSeries := &nodeperftype.NodeTimeSeries{
Labels: testInfo,
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
}
// Attach operation time series.
timeSeries.OperationData = map[string][]int64{
Expand All @@ -58,8 +79,12 @@ func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1
}
// Attach resource time series.
timeSeries.ResourceData = rc.GetResourceTimeSeries()
// Log time series with tags
framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)

if framework.TestContext.ReportDir == "" {
framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
return
}
dumpDataToFile(timeSeries, timeSeries.Labels, "time_series")
}

type int64arr []int64
Expand All @@ -82,7 +107,7 @@ func getCumulatedPodTimeSeries(timePerPod map[string]metav1.Time) []int64 {
// getLatencyPerfData returns perf data of pod startup latency.
func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]string) *perftype.PerfData {
return &perftype.PerfData{
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
DataItems: []perftype.DataItem{
{
Data: map[string]float64{
Expand All @@ -105,7 +130,7 @@ func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]str
// getThroughputPerfData returns perf data of pod creation startup throughput.
func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) *perftype.PerfData {
return &perftype.PerfData{
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
DataItems: []perftype.DataItem{
{
Data: map[string]float64{
Expand All @@ -123,8 +148,10 @@ func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatenc
}
}

// getTestNodeInfo fetches the capacity of a node from API server and returns a map of labels.
func getTestNodeInfo(f *framework.Framework, testName string) map[string]string {
// getTestNodeInfo returns a label map containing the test name and
// description, the name of the node on which the test will be run, the image
// name of the node, and the node capacities.
func getTestNodeInfo(f *framework.Framework, testName, testDesc string) map[string]string {
nodeName := framework.TestContext.NodeName
node, err := f.ClientSet.Core().Nodes().Get(nodeName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -154,5 +181,6 @@ func getTestNodeInfo(f *framework.Framework, testName string) map[string]string
"test": testName,
"image": node.Status.NodeInfo.OSImage,
"machine": fmt.Sprintf("cpu:%dcore,memory:%.1fGB", cpuValue, float32(memoryValue)/(1024*1024*1024)),
"desc": testDesc,
}
}
34 changes: 17 additions & 17 deletions test/e2e_node/density_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
itArg.podsNr, itArg.interval), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, false)

Expand Down Expand Up @@ -152,10 +152,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]",
itArg.podsNr, itArg.interval), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", itArg.podsNr, itArg.interval)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true)

Expand Down Expand Up @@ -189,10 +189,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]",
itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
Expand Down Expand Up @@ -232,10 +232,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
itArg.podsNr, itArg.bgPodsNr), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr)
It(desc, func() {
itArg.createMethod = "sequence"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)

By("Verifying latency")
Expand Down Expand Up @@ -265,10 +265,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]",
itArg.podsNr, itArg.bgPodsNr), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", itArg.podsNr, itArg.bgPodsNr)
It(desc, func() {
itArg.createMethod = "sequence"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)

By("Verifying latency")
Expand Down Expand Up @@ -551,7 +551,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD
podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}

// log latency perf data
framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo))
logPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo), "latency")

if isVerify {
// check whether e2e pod startup time is acceptable.
Expand All @@ -567,7 +567,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD

// logThroughput calculates and logs pod creation throughput.
func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) {
framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo))
logPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo), "throughput")
}

// increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS.
Expand Down
2 changes: 1 addition & 1 deletion test/e2e_node/gubernator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ for upload_attempt in $(seq 3); do
if [[ -d "${ARTIFACTS}" && -n $(ls -A "${ARTIFACTS}") ]]; then
V=2 kube::log::status "Uploading artifacts"
gsutil -m -q -o "GSUtil:use_magicfile=True" cp -a "${gcs_acl}" -r -c \
-z log,xml,xml "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
-z log,xml,json "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
fi
break
done
Expand Down
27 changes: 27 additions & 0 deletions test/e2e_node/perftype/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package(default_visibility = ["//visibility:public"])

licenses(["notice"])

load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)

go_library(
name = "go_default_library",
srcs = ["perftype.go"],
tags = ["automanaged"],
)

filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)

filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
34 changes: 34 additions & 0 deletions test/e2e_node/perftype/perftype.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
Copyright 2017 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package perftype

// ResourceSeries defines the time series of the resource usage.
type ResourceSeries struct {
Timestamp []int64 `json:"ts"`
CPUUsageInMilliCores []int64 `json:"cpu"`
MemoryRSSInMegaBytes []int64 `json:"memory"`
Units map[string]string `json:"unit"`
}

// NodeTimeSeries defines the time series of the operations and the resource
// usage.
type NodeTimeSeries struct {
OperationData map[string][]int64 `json:"op_series,omitempty"`
ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"`
Labels map[string]string `json:"labels"`
Version string `json:"version"`
}
7 changes: 7 additions & 0 deletions test/e2e_node/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ func getTestArtifacts(host, testDir string) error {
if err != nil {
return err
}
// Copy json files (if any) to artifacts.
if _, err = SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
_, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIp(host), testDir), *resultsDir)
if err != nil {
return err
}
}
// Copy junit to the top of artifacts
_, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), *resultsDir)
if err != nil {
Expand Down
15 changes: 4 additions & 11 deletions test/e2e_node/resource_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/util/procfs"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e_node/perftype"

. "github.com/onsi/gomega"
)
Expand Down Expand Up @@ -440,19 +441,11 @@ func newTestPods(numPods int, volume bool, imageName, podType string) []*v1.Pod
return pods
}

// Time series of resource usage
type ResourceSeries struct {
Timestamp []int64 `json:"ts"`
CPUUsageInMilliCores []int64 `json:"cpu"`
MemoryRSSInMegaBytes []int64 `json:"memory"`
Units map[string]string `json:"unit"`
}

// GetResourceSeriesWithLabels gets the time series of resource usage of each container.
func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries {
resourceSeries := make(map[string]*ResourceSeries)
func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.ResourceSeries {
resourceSeries := make(map[string]*perftype.ResourceSeries)
for key, name := range systemContainers {
newSeries := &ResourceSeries{Units: map[string]string{
newSeries := &perftype.ResourceSeries{Units: map[string]string{
"cpu": "mCPU",
"memory": "MB",
}}
Expand Down