Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create multiple RCs in NC - prerequisite for adding services #24434

Merged
merged 1 commit into from
Apr 23, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
86 changes: 64 additions & 22 deletions test/e2e/density.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ func density30AddonResourceVerifier() map[string]framework.ResourceConstraint {
return constraints
}

func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
label := labels.SelectorFromSet(labels.Set(observedLabels))
podStore := framework.NewPodStore(c, ns, label, fields.Everything())
defer podStore.Stop()
ticker := time.NewTicker(period)
for {
select {
case <-ticker.C:
pods := podStore.List()
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
startupStatus.Print("Density")
case <-stopCh:
pods := podStore.List()
startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
startupStatus.Print("Density")
return
}
}
}

// This test suite can take a long time to run, and can affect or be affected by other tests.
// So by default it is added to the ginkgo.skip list (see driver.go).
// To run this suite you must explicitly ask for it by setting the
Expand Down Expand Up @@ -185,7 +205,7 @@ var _ = framework.KubeDescribe("Density", func() {
{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second},
{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second},
{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second},
{podsPerNode: 100, runLatencyTest: false, interval: 1 * time.Second},
{podsPerNode: 100, runLatencyTest: false, interval: 10 * time.Second},
}

for _, testArg := range densityTests {
Expand All @@ -201,22 +221,29 @@ var _ = framework.KubeDescribe("Density", func() {
}
itArg := testArg
It(name, func() {
podsPerNode := itArg.podsPerNode
totalPods = podsPerNode * nodeCount
RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid))
framework.ExpectNoError(err)
defer fileHndl.Close()
config := framework.RCConfig{Client: c,
Image: "gcr.io/google_containers/pause:2.0",
Name: RCName,
Namespace: ns,
PollInterval: itArg.interval,
PodStatusFile: fileHndl,
Replicas: totalPods,
CpuRequest: nodeCpuCapacity / 100,
MemRequest: nodeMemCapacity / 100,
MaxContainerFailures: &MaxContainerFailures,
podsPerNode := itArg.podsPerNode
totalPods = podsPerNode * nodeCount
// TODO: loop to podsPerNode instead of 1 when we're ready.
numberOrRCs := 1
RCConfigs := make([]framework.RCConfig, numberOrRCs)
for i := 0; i < numberOrRCs; i++ {
RCName = "density" + strconv.Itoa(totalPods) + "-" + strconv.Itoa(i) + "-" + uuid
RCConfigs[i] = framework.RCConfig{Client: c,
Image: "gcr.io/google_containers/pause:2.0",
Name: RCName,
Namespace: ns,
Labels: map[string]string{"type": "densityPod"},
PollInterval: itArg.interval,
PodStatusFile: fileHndl,
Replicas: (totalPods + numberOrRCs - 1) / numberOrRCs,
CpuRequest: nodeCpuCapacity / 100,
MemRequest: nodeMemCapacity / 100,
MaxContainerFailures: &MaxContainerFailures,
Silent: true,
}
}

// Create a listener for events.
Expand Down Expand Up @@ -249,7 +276,7 @@ var _ = framework.KubeDescribe("Density", func() {
// uLock is a lock protects the updateCount
var uLock sync.Mutex
updateCount := 0
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": RCName}))
label := labels.SelectorFromSet(labels.Set(map[string]string{"type": "densityPod"}))
_, updateController := controllerframework.NewInformer(
&cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
Expand All @@ -273,10 +300,22 @@ var _ = framework.KubeDescribe("Density", func() {
)
go updateController.Run(stop)

// Start the replication controller.
// Start all replication controllers.
startTime := time.Now()
framework.ExpectNoError(framework.RunRC(config))
wg := sync.WaitGroup{}
wg.Add(len(RCConfigs))
for i := range RCConfigs {
rcConfig := RCConfigs[i]
go func() {
framework.ExpectNoError(framework.RunRC(rcConfig))
wg.Done()
}()
}
logStopCh := make(chan struct{})
go logPodStartupStatus(c, totalPods, ns, map[string]string{"type": "densityPod"}, itArg.interval, logStopCh)
wg.Wait()
e2eStartupTime = time.Now().Sub(startTime)
close(logStopCh)
framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second))

Expand Down Expand Up @@ -506,11 +545,14 @@ var _ = framework.KubeDescribe("Density", func() {

By("Deleting ReplicationController")
// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics.
rc, err := c.ReplicationControllers(ns).Get(RCName)
if err == nil && rc.Spec.Replicas != 0 {
By("Cleaning up the replication controller")
err := framework.DeleteRC(c, ns, RCName)
framework.ExpectNoError(err)
for i := range RCConfigs {
rcName := RCConfigs[i].Name
rc, err := c.ReplicationControllers(ns).Get(rcName)
if err == nil && rc.Spec.Replicas != 0 {
By("Cleaning up the replication controller")
err := framework.DeleteRC(c, ns, rcName)
framework.ExpectNoError(err)
}
}

By("Removing additional replication controllers if any")
Expand Down
139 changes: 80 additions & 59 deletions test/e2e/framework/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ type RCConfig struct {
// Maximum allowable container failures. If exceeded, RunRC returns an error.
// Defaults to replicas*0.1 if unspecified.
MaxContainerFailures *int

// If set to false starting RC will print progress, otherwise only errors will be printed.
Silent bool
}

type DeploymentConfig struct {
Expand Down Expand Up @@ -1934,6 +1937,70 @@ func (config *RCConfig) applyTo(template *api.PodTemplateSpec) {
}
}

type RCStartupStatus struct {
Expected int
Terminating int
Running int
RunningButNotReady int
Waiting int
Pending int
Unknown int
Inactive int
FailedContainers int
Created []*api.Pod
ContainerRestartNodes sets.String
}

func (s *RCStartupStatus) Print(name string) {
Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
name, len(s.Created), s.Expected, s.Running, s.Pending, s.Waiting, s.Inactive, s.Terminating, s.Unknown, s.RunningButNotReady)
}

func ComputeRCStartupStatus(pods []*api.Pod, expected int) RCStartupStatus {
startupStatus := RCStartupStatus{
Expected: expected,
Created: make([]*api.Pod, 0, expected),
ContainerRestartNodes: sets.NewString(),
}
for _, p := range pods {
if p.DeletionTimestamp != nil {
startupStatus.Terminating++
continue
}
startupStatus.Created = append(startupStatus.Created, p)
if p.Status.Phase == api.PodRunning {
ready := false
for _, c := range p.Status.Conditions {
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
ready = true
break
}
}
if ready {
// Only count a pod is running when it is also ready.
startupStatus.Running++
} else {
startupStatus.RunningButNotReady++
}
for _, v := range FailedContainers(p) {
startupStatus.FailedContainers = startupStatus.FailedContainers + v.Restarts
startupStatus.ContainerRestartNodes.Insert(p.Spec.NodeName)
}
} else if p.Status.Phase == api.PodPending {
if p.Spec.NodeName == "" {
startupStatus.Waiting++
} else {
startupStatus.Pending++
}
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
startupStatus.Inactive++
} else if p.Status.Phase == api.PodUnknown {
startupStatus.Unknown++
}
}
return startupStatus
}

func (config *RCConfig) start() error {
// Don't force tests to fail if they don't care about containers restarting.
var maxContainerFailures int
Expand Down Expand Up @@ -1962,74 +2029,28 @@ func (config *RCConfig) start() error {
for oldRunning != config.Replicas {
time.Sleep(interval)

terminating := 0

running := 0
runningButNotReady := 0
waiting := 0
pending := 0
unknown := 0
inactive := 0
failedContainers := 0
containerRestartNodes := sets.NewString()

pods := PodStore.List()
created := []*api.Pod{}
for _, p := range pods {
if p.DeletionTimestamp != nil {
terminating++
continue
}
created = append(created, p)
if p.Status.Phase == api.PodRunning {
ready := false
for _, c := range p.Status.Conditions {
if c.Type == api.PodReady && c.Status == api.ConditionTrue {
ready = true
break
}
}
if ready {
// Only count a pod is running when it is also ready.
running++
} else {
runningButNotReady++
}
for _, v := range FailedContainers(p) {
failedContainers = failedContainers + v.Restarts
containerRestartNodes.Insert(p.Spec.NodeName)
}
} else if p.Status.Phase == api.PodPending {
if p.Spec.NodeName == "" {
waiting++
} else {
pending++
}
} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
inactive++
} else if p.Status.Phase == api.PodUnknown {
unknown++
}
}
pods = created
startupStatus := ComputeRCStartupStatus(pods, config.Replicas)

pods = startupStatus.Created
if config.CreatedPods != nil {
*config.CreatedPods = pods
}
if !config.Silent {
startupStatus.Print(config.Name)
}

Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
config.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown, runningButNotReady)

promPushRunningPending(running, pending)
promPushRunningPending(startupStatus.Running, startupStatus.Pending)

if config.PodStatusFile != nil {
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", running, pending, waiting, inactive, unknown, runningButNotReady)
fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", startupStatus.Running, startupStatus.Pending, startupStatus.Waiting, startupStatus.Inactive, startupStatus.Unknown, startupStatus.RunningButNotReady)
}

if failedContainers > maxContainerFailures {
DumpNodeDebugInfo(config.Client, containerRestartNodes.List())
if startupStatus.FailedContainers > maxContainerFailures {
DumpNodeDebugInfo(config.Client, startupStatus.ContainerRestartNodes.List())
// Get the logs from the failed containers to help diagnose what caused them to fail
LogFailedContainers(config.Namespace)
return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures)
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
}
if len(pods) < len(oldPods) || len(pods) > config.Replicas {
// This failure mode includes:
Expand All @@ -2043,11 +2064,11 @@ func (config *RCConfig) start() error {
return fmt.Errorf(errorStr)
}

if len(pods) > len(oldPods) || running > oldRunning {
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
lastChange = time.Now()
}
oldPods = pods
oldRunning = running
oldRunning = startupStatus.Running

if time.Since(lastChange) > timeout {
dumpPodDebugInfo(config.Client, pods)
Expand Down