Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Label flaky e2es with [Flaky] & slow tests with [Slow] #19021

Merged
merged 4 commits into from Dec 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 2 additions & 40 deletions hack/jenkins/e2e.sh
Expand Up @@ -65,7 +65,6 @@ function join_regex_no_empty() {
# $GCE_DEFAULT_SKIP_TESTS
# $GCE_FLAKY_TESTS
# $GCE_SLOW_TESTS
# $GKE_FLAKY_TESTS
#
# Args:
# $1 old_version: the version to deploy a cluster at, and old e2e tests to run
Expand Down Expand Up @@ -99,7 +98,6 @@ function configure_upgrade_step() {
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \
)"

if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
Expand Down Expand Up @@ -302,11 +300,6 @@ GKE_REQUIRED_SKIP_TESTS=(
"Deployment"
)

# Tests wchich are known to be flaky on GKE
GKE_FLAKY_TESTS=(
"NodeOutOfDisk"
)

# Specialized tests which should be skipped by default for GKE.
GKE_DEFAULT_SKIP_TESTS=(
"Autoscaling\sSuite"
Expand All @@ -330,12 +323,7 @@ DISRUPTIVE_TESTS=(
# The following tests are known to be flaky, and are thus run only in their own
# -flaky- build variants.
GCE_FLAKY_TESTS=(
"GCE\sL7\sLoadBalancer\sController" # issue: #17518
"DaemonRestart\sController\sManager" # issue: #17829
"Daemon\sset\sshould\srun\sand\sstop\scomplex\sdaemon" # issue: #16623
"Resource\susage\sof\ssystem\scontainers" # issue: #13931
"NodeOutOfDisk" # issue: #17687
"Cluster\slevel\slogging\susing\sElasticsearch" # issue: #17873
"\[Flaky\]"
)

# The following tests are known to be slow running (> 2 min), and are
Expand All @@ -349,14 +337,7 @@ GCE_SLOW_TESTS=(
# make sure the associated project has enough quota. At the time of this
# writing a GCE project is allowed 3 backend services by default. This
# test requires at least 5.
"GCE\sL7\sLoadBalancer\sController" # 10 min, file: ingress.go, slow by design
"SchedulerPredicates\svalidates\sMaxPods\slimit " # 8 min, file: scheduler_predicates.go, PR: #13315
"Nodes\sResize" # 3 min 30 sec, file: resize_nodes.go, issue: #13323
"resource\susage\stracking" # 1 hour, file: kubelet_perf.go, slow by design
"monotonically\sincreasing\srestart\scount" # 1.5 to 5 min, file: pods.go, slow by design
"Garbage\scollector\sshould" # 7 min, file: garbage_collector.go, slow by design
"KubeProxy\sshould\stest\skube-proxy" # 9 min 30 sec, file: kubeproxy.go, issue: #14204
"cap\sback-off\sat\sMaxContainerBackOff" # 20 mins file: manager.go, PR: #12648
"\[Slow\]"
)

# Tests which are not able to be run in parallel.
Expand All @@ -367,16 +348,6 @@ GCE_PARALLEL_SKIP_TESTS=(
"\[Disruptive\]"
)

# Tests which are known to be flaky when run in parallel.
GCE_PARALLEL_FLAKY_TESTS=(
"DaemonRestart"
"Elasticsearch"
"Namespaces.*should\sdelete\sfast"
"Pods.*back-off\srestarting.*LivenessProbe" # issue: #18293
"ServiceAccounts"
"Services.*identically\snamed" # error waiting for reachability, issue: #16285
)

# Tests that should not run on soak cluster.
GCE_SOAK_CONTINUOUS_SKIP_TESTS=(
"GCE\sL7\sLoadBalancer\sController" # issue: #17119
Expand Down Expand Up @@ -496,7 +467,6 @@ case ${JOB_NAME} in
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
: ${KUBE_GCE_INSTANCE_PREFIX:="e2e-gce-${NODE_NAME}-${EXECUTOR_NUMBER}"}
Expand All @@ -516,7 +486,6 @@ case ${JOB_NAME} in
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
: ${KUBE_GCE_INSTANCE_PREFIX:="e2e-test-parallel"}
Expand All @@ -535,7 +504,6 @@ case ${JOB_NAME} in
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \
)"}
: ${ENABLE_DEPLOYMENTS:=true}
Expand All @@ -553,7 +521,6 @@ case ${JOB_NAME} in
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
) --ginkgo.focus=$(join_regex_no_empty \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
)"}
: ${KUBE_GCE_INSTANCE_PREFIX:="parallel-flaky"}
: ${PROJECT:="k8s-jkns-e2e-gce-prl-flaky"}
Expand Down Expand Up @@ -597,7 +564,6 @@ case ${JOB_NAME} in
: ${E2E_NETWORK:="e2e-gce-flannel"}
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
Expand Down Expand Up @@ -672,7 +638,6 @@ case ${JOB_NAME} in
${GKE_DEFAULT_SKIP_TESTS[@]:+${GKE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \
)"}
;;

Expand All @@ -687,7 +652,6 @@ case ${JOB_NAME} in
${GKE_DEFAULT_SKIP_TESTS[@]:+${GKE_DEFAULT_SKIP_TESTS[@]}} \
${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;
Expand All @@ -701,7 +665,6 @@ case ${JOB_NAME} in
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}}) \
--ginkgo.focus=$(join_regex_no_empty \
${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
)"}
;;
Expand Down Expand Up @@ -734,7 +697,6 @@ case ${JOB_NAME} in
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GKE_FLAKY_TESTS[@]:+${GKE_FLAKY_TESTS[@]}} \
${GCE_SOAK_CONTINUOUS_SKIP_TESTS[@]:+${GCE_SOAK_CONTINUOUS_SKIP_TESTS[@]}} \
)"}
;;
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/daemon_restart.go
Expand Up @@ -183,7 +183,8 @@ func getContainerRestarts(c *client.Client, ns string, labelSelector labels.Sele
return failedContainers, containerRestartNodes.List()
}

var _ = Describe("DaemonRestart [Disruptive]", func() {
// Flaky issues #17829, #19023
var _ = Describe("DaemonRestart [Disruptive] [Flaky]", func() {

framework := NewFramework("daemonrestart")
rcName := "daemonrestart" + strconv.Itoa(numPods) + "-" + string(util.NewUUID())
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/daemon_set.go
Expand Up @@ -126,7 +126,8 @@ var _ = Describe("Daemon set", func() {

})

It("should run and stop complex daemon", func() {
// Flaky issue #16623
It("should run and stop complex daemon [Flaky]", func() {
complexLabel := map[string]string{daemonsetNameLabel: dsName}
nodeSelector := map[string]string{daemonsetColorLabel: "blue"}
Logf("Creating daemon with a node selector %s", dsName)
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/es_cluster_logging.go
Expand Up @@ -30,7 +30,8 @@ import (
. "github.com/onsi/gomega"
)

var _ = Describe("Cluster level logging using Elasticsearch", func() {
// Flaky issue #17873
var _ = Describe("Cluster level logging using Elasticsearch [Flaky]", func() {
f := NewFramework("es-logging")

BeforeEach(func() {
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/garbage_collector.go
Expand Up @@ -27,7 +27,9 @@ import (
)

// This test requires that --terminated-pod-gc-threshold=100 be set on the controller manager
var _ = Describe("Garbage collector", func() {
//
// Slow by design (7 min)
var _ = Describe("Garbage collector [Slow]", func() {
f := NewFramework("garbage-collector")
It("should handle the creation of 1000 pods", func() {
SkipUnlessProviderIs("gce")
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/ingress.go
Expand Up @@ -376,7 +376,9 @@ func (cont *IngressController) Cleanup(del bool) error {
return fmt.Errorf(errMsg)
}

var _ = Describe("GCE L7 LoadBalancer Controller [Serial]", func() {
// Slow by design (10 min)
// Flaky issue #17518
var _ = Describe("GCE L7 LoadBalancer Controller [Serial] [Slow] [Flaky]", func() {
// These variables are initialized after framework's beforeEach.
var ns string
var addonDir string
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/kubelet_perf.go
Expand Up @@ -138,7 +138,8 @@ func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) {
}
}

var _ = Describe("Kubelet [Serial]", func() {
// Slow by design (1 hour)
var _ = Describe("Kubelet [Serial] [Slow]", func() {
var nodeNames sets.String
framework := NewFramework("kubelet-perf")
var rm *resourceMonitor
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/kubeproxy.go
Expand Up @@ -71,7 +71,8 @@ var _ = Describe("KubeProxy", func() {
f: f,
}

It("should test kube-proxy", func() {
// Slow issue #14204 (10 min)
It("should test kube-proxy [Slow]", func() {
By("cleaning up any pre-existing namespaces used by this test")
config.cleanup()

Expand Down
3 changes: 2 additions & 1 deletion test/e2e/monitor_resources.go
Expand Up @@ -72,7 +72,8 @@ func computeAverage(sliceOfUsages []resourceUsagePerContainer) (result resourceU

// This tests does nothing except checking current resource usage of containers defined in kubelet_stats systemContainers variable.
// Test fails if an average container resource consumption over datapointAmount tries exceeds amount defined in allowedUsage.
var _ = Describe("Resource usage of system containers [Serial]", func() {
// Flaky issue #13931
var _ = Describe("Resource usage of system containers [Serial] [Flaky]", func() {
var c *client.Client
BeforeEach(func() {
var err error
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/namespace.go
Expand Up @@ -91,7 +91,9 @@ var _ = Describe("Namespaces", func() {

//Confirms that namespace draining is functioning reasonably
//at minute intervals.
It("should delete fast enough (90 percent of 100 namespaces in 150 seconds)",
//
// Flaky issue #19026
It("should delete fast enough (90 percent of 100 namespaces in 150 seconds) [Flaky]",
func() { extinguish(c, 100, 10, 150) })

//comprehensive draining ; uncomment after #7372
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/nodeoutofdisk.go
Expand Up @@ -63,7 +63,8 @@ const (
// choose that node to be node with index 1.
// 7. Observe that the pod in pending status schedules on that node.
//
var _ = Describe("NodeOutOfDisk [Serial]", func() {
// Flaky issue #17687
var _ = Describe("NodeOutOfDisk [Serial] [Flaky]", func() {
var c *client.Client
var unfilledNodeName, recoveredNodeName string
framework := Framework{BaseName: "node-outofdisk"}
Expand Down
9 changes: 6 additions & 3 deletions test/e2e/pods.go
Expand Up @@ -635,7 +635,8 @@ var _ = Describe("Pods", func() {
}, 1, defaultObservationTimeout)
})

It("should have monotonically increasing restart count [Conformance]", func() {
// Slow by design (5 min)
It("should have monotonically increasing restart count [Conformance] [Slow]", func() {
runLivenessTest(framework.Client, framework.Namespace.Name, &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "liveness-http",
Expand Down Expand Up @@ -896,7 +897,8 @@ var _ = Describe("Pods", func() {
}
})

It("should not back-off restarting a container on LivenessProbe failure", func() {
// Flaky issue #18293
It("should not back-off restarting a container on LivenessProbe failure [Flaky]", func() {
podClient := framework.Client.Pods(framework.Namespace.Name)
podName := "pod-back-off-liveness"
containerName := "back-off-liveness"
Expand Down Expand Up @@ -936,7 +938,8 @@ var _ = Describe("Pods", func() {
}
})

It("should cap back-off at MaxContainerBackOff", func() {
// Slow issue #19027 (20 mins)
It("should cap back-off at MaxContainerBackOff [Slow]", func() {
podClient := framework.Client.Pods(framework.Namespace.Name)
podName := "back-off-cap"
containerName := "back-off-cap"
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/resize_nodes.go
Expand Up @@ -402,7 +402,8 @@ var _ = Describe("Nodes [Disruptive]", func() {
systemPodsNo = len(systemPods.Items)
})

Describe("Resize", func() {
// Slow issue #13323 (8 min)
Describe("Resize [Slow]", func() {
var skipped bool

BeforeEach(func() {
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/scheduler_predicates.go
Expand Up @@ -200,7 +200,9 @@ var _ = Describe("SchedulerPredicates [Serial]", func() {
// This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable
// and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check
// if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds.
It("validates MaxPods limit number of pods that are allowed to run", func() {
//
// Slow PR #13315 (8 min)
It("validates MaxPods limit number of pods that are allowed to run [Slow]", func() {
totalPodCapacity = 0

for _, node := range nodeList.Items {
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/service.go
Expand Up @@ -778,7 +778,8 @@ var _ = Describe("Services", func() {
})

// This test hits several load-balancer cases because LB turnup is slow.
It("should serve identically named services in different namespaces on different load-balancers", func() {
// Flaky issue #18952
It("should serve identically named services in different namespaces on different load-balancers [Flaky]", func() {
// requires ExternalLoadBalancer
SkipUnlessProviderIs("gce", "gke", "aws")

Expand Down
3 changes: 2 additions & 1 deletion test/e2e/service_accounts.go
Expand Up @@ -30,7 +30,8 @@ import (
. "github.com/onsi/ginkgo"
)

var _ = Describe("ServiceAccounts", func() {
// Flaky issue #19024
var _ = Describe("ServiceAccounts [Flaky]", func() {
f := NewFramework("svcaccounts")

It("should mount an API token into pods [Conformance]", func() {
Expand Down