Skip to content

Commit

Permalink
delete unused func in latencies.go
Browse files Browse the repository at this point in the history
  • Loading branch information
YuikoTakada committed Nov 7, 2019
1 parent 24fb2c1 commit 3ebe59c
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 330 deletions.
3 changes: 0 additions & 3 deletions test/e2e/framework/metrics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,14 @@ go_library(
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/master/ports:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
"//test/e2e/framework/log:go_default_library",
"//test/e2e/framework/ssh:go_default_library",
"//test/e2e/perftype:go_default_library",
"//test/e2e/system:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
Expand Down
46 changes: 0 additions & 46 deletions test/e2e/framework/metrics/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package metrics

import (
"fmt"
"time"

e2eperftype "k8s.io/kubernetes/test/e2e/perftype"
)
Expand Down Expand Up @@ -61,51 +60,6 @@ func (a *APIResponsiveness) Less(i, j int) bool {
return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99
}

// Set request latency for a particular quantile in the APICall metric entry (creating one if necessary).
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
// Only 0.5, 0.9 and 0.99 quantiles are supported.
func (a *APIResponsiveness) addMetricRequestLatency(resource, subresource, verb, scope string, quantile float64, latency time.Duration) {
for i, apicall := range a.APICalls {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
return
}
}
apicall := setQuantileAPICall(APICall{Resource: resource, Subresource: subresource, Verb: verb, Scope: scope}, quantile, latency)
a.APICalls = append(a.APICalls, apicall)
}

// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
// Only 0.5, 0.9 and 0.99 quantiles are supported.
func setQuantileAPICall(apicall APICall, quantile float64, latency time.Duration) APICall {
setQuantile(&apicall.Latency, quantile, latency)
return apicall
}

// Only 0.5, 0.9 and 0.99 quantiles are supported.
func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration) {
switch quantile {
case 0.5:
metric.Perc50 = latency
case 0.9:
metric.Perc90 = latency
case 0.99:
metric.Perc99 = latency
}
}

// Add request count to the APICall metric entry (creating one if necessary).
func (a *APIResponsiveness) addMetricRequestCount(resource, subresource, verb, scope string, count int) {
for i, apicall := range a.APICalls {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
a.APICalls[i].Count += count
return
}
}
apicall := APICall{Resource: resource, Subresource: subresource, Verb: verb, Count: count, Scope: scope}
a.APICalls = append(a.APICalls, apicall)
}

// currentAPICallMetricsVersion is the current apicall performance metrics version. We should
// bump up the version each time we make incompatible change to the metrics.
const currentAPICallMetricsVersion = "v1"
Expand Down
281 changes: 0 additions & 281 deletions test/e2e/framework/metrics/latencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,145 +17,20 @@ limitations under the License.
package metrics

import (
"context"
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/component-base/metrics/testutil"
"k8s.io/kubernetes/pkg/master/ports"
schedulermetric "k8s.io/kubernetes/pkg/scheduler/metrics"
e2elog "k8s.io/kubernetes/test/e2e/framework/log"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
"k8s.io/kubernetes/test/e2e/system"

"github.com/onsi/gomega"
)

const (
// SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
// transient failures from failing tests.
// TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
SingleCallTimeout = 5 * time.Minute

// nodeStartupThreshold is a rough estimate of the time allocated for a pod to start on a node.
nodeStartupThreshold = 4 * time.Second

// We are setting 1s threshold for apicalls even in small clusters to avoid flakes.
// The problem is that if long GC is happening in small clusters (where we have e.g.
// 1-core master machines) and tests are pretty short, it may consume significant
// portion of CPU and basically stop all the real work.
// Increasing threshold to 1s is within our SLO and should solve this problem.
apiCallLatencyThreshold time.Duration = 1 * time.Second

// We use a higher threshold for list apicalls if the cluster is big (i.e having > 500 nodes)
// as list response sizes are bigger in general for big clusters. We also use a higher threshold
// for list calls at cluster scope (this includes non-namespaced and all-namespaced calls).
apiListCallLatencyThreshold time.Duration = 5 * time.Second
apiClusterScopeListCallThreshold time.Duration = 10 * time.Second
bigClusterNodeCountThreshold = 500
)

var schedulingLatencyMetricName = schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName

func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
var a APIResponsiveness

body, err := getMetrics(c)
if err != nil {
return nil, err
}

samples, err := testutil.ExtractMetricSamples(body)
if err != nil {
return nil, err
}

ignoredResources := sets.NewString("events")
// TODO: figure out why we're getting non-capitalized proxy and fix this.
ignoredVerbs := sets.NewString("WATCH", "WATCHLIST", "PROXY", "proxy", "CONNECT")

for _, sample := range samples {
// Example line:
// apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908
// apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
if sample.Metric[testutil.MetricNameLabel] != "apiserver_request_latencies_summary" &&
sample.Metric[testutil.MetricNameLabel] != "apiserver_request_total" {
continue
}

resource := string(sample.Metric["resource"])
subresource := string(sample.Metric["subresource"])
verb := string(sample.Metric["verb"])
scope := string(sample.Metric["scope"])
if ignoredResources.Has(resource) || ignoredVerbs.Has(verb) {
continue
}

switch sample.Metric[testutil.MetricNameLabel] {
case "apiserver_request_latencies_summary":
latency := sample.Value
quantile, err := strconv.ParseFloat(string(sample.Metric[testutil.QuantileLabel]), 64)
if err != nil {
return nil, err
}
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
case "apiserver_request_total":
count := sample.Value
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))

}
}

return &a, err
}

// HighLatencyRequests prints top five summary metrics for request types with latency and returns
// number of such request types above threshold. We use a higher threshold for
// list calls if nodeCount is above a given threshold (i.e. cluster is big).
func HighLatencyRequests(c clientset.Interface, nodeCount int) (int, *APIResponsiveness, error) {
isBigCluster := (nodeCount > bigClusterNodeCountThreshold)
metrics, err := readLatencyMetrics(c)
if err != nil {
return 0, metrics, err
}
sort.Sort(sort.Reverse(metrics))
badMetrics := 0
top := 5
for i := range metrics.APICalls {
latency := metrics.APICalls[i].Latency.Perc99
isListCall := (metrics.APICalls[i].Verb == "LIST")
isClusterScopedCall := (metrics.APICalls[i].Scope == "cluster")
isBad := false
latencyThreshold := apiCallLatencyThreshold
if isListCall && isBigCluster {
latencyThreshold = apiListCallLatencyThreshold
if isClusterScopedCall {
latencyThreshold = apiClusterScopeListCallThreshold
}
}
if latency > latencyThreshold {
isBad = true
badMetrics++
}
if top > 0 || isBad {
top--
prefix := ""
if isBad {
prefix = "WARNING "
}
e2elog.Logf("%vTop latency metric: %+v", prefix, metrics.APICalls[i])
}
}
return badMetrics, metrics, nil
}

// VerifyLatencyWithinThreshold verifies whether 50, 90 and 99th percentiles of a latency metric are
// within the expected threshold.
func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName string) error {
Expand All @@ -171,147 +46,6 @@ func VerifyLatencyWithinThreshold(threshold, actual LatencyMetric, metricName st
return nil
}

// ResetMetrics resets latency metrics in apiserver.
func ResetMetrics(c clientset.Interface) error {
e2elog.Logf("Resetting latency metrics in apiserver...")
body, err := c.CoreV1().RESTClient().Delete().AbsPath("/metrics").DoRaw()
if err != nil {
return err
}
if string(body) != "metrics reset\n" {
return fmt.Errorf("Unexpected response: %q", string(body))
}
return nil
}

// Retrieves metrics information.
func getMetrics(c clientset.Interface) (string, error) {
body, err := c.CoreV1().RESTClient().Get().AbsPath("/metrics").DoRaw()
if err != nil {
return "", err
}
return string(body), nil
}

// Sends REST request to kube scheduler metrics
func sendRestRequestToScheduler(c clientset.Interface, op, provider, cloudMasterName, masterHostname string) (string, error) {
opUpper := strings.ToUpper(op)
if opUpper != "GET" && opUpper != "DELETE" {
return "", fmt.Errorf("Unknown REST request")
}

nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
// The following 4 lines are intended to replace framework.ExpectNoError(err)
if err != nil {
e2elog.Logf("Unexpected error occurred: %v", err)
}
gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())

var masterRegistered = false
for _, node := range nodes.Items {
if system.DeprecatedMightBeMasterNode(node.Name) {
masterRegistered = true
}
}

var responseText string
if masterRegistered {
ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
defer cancel()

body, err := c.CoreV1().RESTClient().Verb(opUpper).
Context(ctx).
Namespace(metav1.NamespaceSystem).
Resource("pods").
Name(fmt.Sprintf("kube-scheduler-%v:%v", cloudMasterName, ports.InsecureSchedulerPort)).
SubResource("proxy").
Suffix("metrics").
Do().Raw()

// The following 4 lines are intended to replace
// framework.ExpectNoError(err).
if err != nil {
e2elog.Logf("Unexpected error occurred: %v", err)
}
gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
responseText = string(body)
} else {
// If master is not registered fall back to old method of using SSH.
if provider == "gke" || provider == "eks" {
e2elog.Logf("Not grabbing scheduler metrics through master SSH: unsupported for %s", provider)
return "", nil
}

cmd := "curl -X " + opUpper + " http://localhost:10251/metrics"
sshResult, err := e2essh.SSH(cmd, masterHostname+":22", provider)
if err != nil || sshResult.Code != 0 {
return "", fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
}
responseText = sshResult.Stdout
}
return responseText, nil
}

// Retrieves scheduler latency metrics.
func getSchedulingLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
result := SchedulingMetrics{}
data, err := sendRestRequestToScheduler(c, "GET", provider, cloudMasterName, masterHostname)
if err != nil {
return nil, err
}

samples, err := testutil.ExtractMetricSamples(data)
if err != nil {
return nil, err
}

for _, sample := range samples {
if string(sample.Metric[testutil.MetricNameLabel]) != schedulingLatencyMetricName {
continue
}

var metric *LatencyMetric
switch sample.Metric[schedulermetric.OperationLabel] {
case schedulermetric.PredicateEvaluation:
metric = &result.PredicateEvaluationLatency
case schedulermetric.PriorityEvaluation:
metric = &result.PriorityEvaluationLatency
case schedulermetric.PreemptionEvaluation:
metric = &result.PreemptionEvaluationLatency
case schedulermetric.Binding:
metric = &result.BindingLatency
}
if metric == nil {
continue
}

quantile, err := strconv.ParseFloat(string(sample.Metric[testutil.QuantileLabel]), 64)
if err != nil {
return nil, err
}
setQuantile(metric, quantile, time.Duration(int64(float64(sample.Value)*float64(time.Second))))
}
return &result, nil
}

// VerifySchedulerLatency verifies (currently just by logging them) the scheduling latencies.
func VerifySchedulerLatency(c clientset.Interface, provider, cloudMasterName, masterHostname string) (*SchedulingMetrics, error) {
latency, err := getSchedulingLatency(c, provider, cloudMasterName, masterHostname)
if err != nil {
return nil, err
}
return latency, nil
}

// ResetSchedulerMetrics sends a DELETE request to kube-scheduler for resetting metrics.
func ResetSchedulerMetrics(c clientset.Interface, provider, cloudMasterName, masterHostname string) error {
responseText, err := sendRestRequestToScheduler(c, "DELETE", provider, cloudMasterName, masterHostname)
if err != nil {
return fmt.Errorf("Unexpected response: %q, %v", responseText, err)
}
return nil
}

// PodLatencyData encapsulates pod startup latency information.
type PodLatencyData struct {
// Name of the pod
Expand Down Expand Up @@ -339,21 +73,6 @@ func ExtractLatencyMetrics(latencies []PodLatencyData) LatencyMetric {
return LatencyMetric{Perc50: perc50, Perc90: perc90, Perc99: perc99, Perc100: perc100}
}

// LogSuspiciousLatency logs metrics/docker errors from all nodes that had slow startup times
// If latencyDataLag is nil then it will be populated from latencyData
func LogSuspiciousLatency(latencyData []PodLatencyData, latencyDataLag []PodLatencyData, nodeCount int, c clientset.Interface) {
if latencyDataLag == nil {
latencyDataLag = latencyData
}
for _, l := range latencyData {
if l.Latency > nodeStartupThreshold {
HighLatencyKubeletOperations(c, 1*time.Second, l.Node, e2elog.Logf)
}
}
e2elog.Logf("Approx throughput: %v pods/min",
float64(nodeCount)/(latencyDataLag[len(latencyDataLag)-1].Latency.Minutes()))
}

// PrintLatencies outputs latencies to log with readable format.
func PrintLatencies(latencies []PodLatencyData, header string) {
metrics := ExtractLatencyMetrics(latencies)
Expand Down

0 comments on commit 3ebe59c

Please sign in to comment.