Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run 50 pod kubelet resource tracking test on GCE only #14698

Merged
merged 2 commits into from
Oct 2, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions hack/jenkins/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,14 @@ GKE_REQUIRED_SKIP_TESTS=(
"Shell"
"Daemon\sset"
"Deployment"
"experimental\sresource\susage\stracking" # Expect --max-pods=100
)

# Tests which cannot be run on AWS.
AWS_REQUIRED_SKIP_TESTS=(
"experimental\sresource\susage\stracking" # Expect --max-pods=100
)

# The following tests are known to be flaky, and are thus run only in their own
# -flaky- build variants.
GCE_FLAKY_TESTS=(
Expand Down Expand Up @@ -301,6 +307,7 @@ case ${JOB_NAME} in
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \
)"}
: ${ENABLE_DEPLOYMENTS:=true}
# Override AWS defaults.
Expand Down Expand Up @@ -439,6 +446,7 @@ case ${JOB_NAME} in
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;

Expand All @@ -455,6 +463,7 @@ case ${JOB_NAME} in
${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;

Expand Down Expand Up @@ -499,6 +508,7 @@ case ${JOB_NAME} in
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;

Expand Down
107 changes: 62 additions & 45 deletions test/e2e/kubelet_perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,62 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
}
}

func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
numNodes := nodeNames.Len()
totalPods := podsPerNode * numNodes
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))

// TODO: Use a more realistic workload
Expect(RunRC(RCConfig{
Client: framework.Client,
Name: rcName,
Namespace: framework.Namespace.Name,
Image: "gcr.io/google_containers/pause:go",
Replicas: totalPods,
})).NotTo(HaveOccurred())

// Log once and flush the stats.
resourceMonitor.LogLatest()
resourceMonitor.Reset()

By("Start monitoring resource usage")
// Periodically dump the cpu summary until the deadline is met.
// Note that without calling resourceMonitor.Reset(), the stats
// would occupy increasingly more memory. This should be fine
// for the current test duration, but we should reclaim the
// entries if we plan to monitor longer (e.g., 8 hours).
deadline := time.Now().Add(monitoringTime)
for time.Now().Before(deadline) {
Logf("Still running...%v left", deadline.Sub(time.Now()))
time.Sleep(reportingPeriod)
timeLeft := deadline.Sub(time.Now())
Logf("Still running...%v left", timeLeft)
if timeLeft < reportingPeriod {
time.Sleep(timeLeft)
} else {
time.Sleep(reportingPeriod)
}
logPodsOnNodes(framework.Client, nodeNames.List())
}

By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
resourceMonitor.LogCPUSummary()
resourceMonitor.LogLatest()

By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
}

var _ = Describe("Kubelet", func() {
var numNodes int
var nodeNames sets.String
framework := NewFramework("kubelet-perf")
var resourceMonitor *resourceMonitor

BeforeEach(func() {
nodes, err := framework.Client.Nodes().List(labels.Everything(), fields.Everything())
expectNoError(err)
numNodes = len(nodes.Items)
nodeNames = sets.NewString()
for _, node := range nodes.Items {
nodeNames.Insert(node.Name)
Expand All @@ -72,54 +118,25 @@ var _ = Describe("Kubelet", func() {
resourceMonitor.Stop()
})

Describe("resource usage tracking", func() {
density := []int{0, 50}
Describe("regular resource usage tracking", func() {
density := []int{0, 35}
for i := range density {
podsPerNode := density[i]
name := fmt.Sprintf(
"over %v with %d pods per node.", monitoringTime, podsPerNode)
It(name, func() {
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
})
}
})
Describe("experimental resource usage tracking", func() {
density := []int{50}
for i := range density {
podsPerNode := density[i]
name := fmt.Sprintf(
"over %v with %d pods per node.", monitoringTime, podsPerNode)
It(name, func() {
// Skip this test for GKE.
// TODO: Re-activate this for GKE
SkipIfProviderIs("gke")

totalPods := podsPerNode * numNodes
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))

// TODO: Use a more realistic workload
Expect(RunRC(RCConfig{
Client: framework.Client,
Name: rcName,
Namespace: framework.Namespace.Name,
Image: "gcr.io/google_containers/pause:go",
Replicas: totalPods,
})).NotTo(HaveOccurred())

// Log once and flush the stats.
resourceMonitor.LogLatest()
resourceMonitor.Reset()

By("Start monitoring resource usage")
// Periodically dump the cpu summary until the deadline is met.
// Note that without calling resourceMonitor.Reset(), the stats
// would occupy increasingly more memory. This should be fine
// for the current test duration, but we should reclaim the
// entries if we plan to monitor longer (e.g., 8 hours).
deadline := time.Now().Add(monitoringTime)
for time.Now().Before(deadline) {
Logf("Still running...%v left", deadline.Sub(time.Now()))
time.Sleep(reportingPeriod)
logPodsOnNodes(framework.Client, nodeNames.List())
}

By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
resourceMonitor.LogCPUSummary()
resourceMonitor.LogLatest()

By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
})
}
})
Expand Down