Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(exp): Add pod autoscaler experiment used to check the scalability of the application pod #65

Merged
merged 5 commits into from
Aug 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build/generate_go_binary
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ go build -o build/_output/node-cpu-hog ./experiments/generic/node-cpu-hog
go build -o build/_output/container-kill ./experiments/generic/container-kill
# Buiding go binaries for disk_fill experiment
go build -o build/_output/disk-fill ./experiments/generic/disk-fill
# Buiding go binaries for pod-autoscaler experiment
go build -o build/_output/pod-autoscaler ./experiments/generic/pod-autoscaler
# Buiding go binaries for container_kill helper
go build -o build/_output/container-killer ./chaoslib/litmus/container_kill/helper
215 changes: 215 additions & 0 deletions chaoslib/litmus/pod_autoscaler/pod-autoscaler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
package pod_autoscaler

import (
"strconv"
"time"

clients "github.com/litmuschaos/litmus-go/pkg/clients"
experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types"
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/types"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
retries "k8s.io/client-go/util/retry"

"github.com/pkg/errors"
)

var err error

//PreparePodAutoscaler contains the prepration steps before chaos injection
func PreparePodAutoscaler(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {

appName, replicaCount, err := GetApplicationDetails(experimentsDetails, clients)
if err != nil {
return errors.Errorf("Unable to get the relicaCount of the application, err: %v", err)
}

//Waiting for the ramp time before chaos injection
if experimentsDetails.RampTime != 0 {
log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime))
waitForRampTime(experimentsDetails)
}

err = PodAutoscalerChaos(experimentsDetails, clients, replicaCount, appName, resultDetails, eventsDetails, chaosDetails)

if err != nil {
return errors.Errorf("Unable to perform autoscaling, due to %v", err)
}

err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName)
if err != nil {
return errors.Errorf("Unable to perform autoscaling, due to %v", err)
}

//Waiting for the ramp time after chaos injection
if experimentsDetails.RampTime != 0 {
log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime))
waitForRampTime(experimentsDetails)
}
return nil
}

//waitForRampTime waits for the given ramp time duration (in seconds)
func waitForRampTime(experimentsDetails *experimentTypes.ExperimentDetails) {
time.Sleep(time.Duration(experimentsDetails.RampTime) * time.Second)
}

//GetApplicationDetails is used to get the application name, replicas of the application
func GetApplicationDetails(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (string, int, error) {

var appReplica int
var appName string
// Get Deployment replica count
applicationList, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.AppLabel})
if err != nil || len(applicationList.Items) == 0 {
return "", 0, errors.Errorf("Unable to get application, err: %v", err)
}
for _, app := range applicationList.Items {
appReplica = int(*app.Spec.Replicas)
appName = app.Name

}
return appName, appReplica, nil

}

//PodAutoscalerChaos scales up the application pod replicas
func PodAutoscalerChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {

applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS)

replicas := int32(experimentsDetails.Replicas)
// Scale Application
retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error {
// Retrieve the latest version of Deployment before attempting update
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver
appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Failed to get latest version of Application Deployment: %v", err)
}

appUnderTest.Spec.Replicas = int32Ptr(replicas) // modify replica count
_, updateErr := applicationClient.Update(appUnderTest)
return updateErr
})
if retryErr != nil {
return errors.Errorf("Unable to scale the application, due to: %v", retryErr)
}
log.Info("Application Started Scaling")

err = ApplicationPodStatusCheck(experimentsDetails, appName, clients, replicaCount, resultDetails, eventsDetails, chaosDetails)
if err != nil {
return errors.Errorf("Status Check failed, err: %v", err)
}

return nil
}

// ApplicationPodStatusCheck checks the status of the application pod
func ApplicationPodStatusCheck(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets, replicaCount int, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {

//ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
ChaosStartTimeStamp := time.Now().Unix()
failFlag := false
applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS)
applicationDeploy, err := applicationClient.Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Unable to get the application, err: %v", err)
}
for count := 0; count < int(experimentsDetails.ChaosDuration/2); count++ {
uditgaurav marked this conversation as resolved.
Show resolved Hide resolved

if int(applicationDeploy.Status.AvailableReplicas) != experimentsDetails.Replicas {

log.Infof("Application Pod Avaliable Count is: %s", strconv.Itoa(int(applicationDeploy.Status.AvailableReplicas)))
applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Unable to get the application, err: %v", err)
}

time.Sleep(2 * time.Second)
//ChaosCurrentTimeStamp contains the current timestamp
ChaosCurrentTimeStamp := time.Now().Unix()

//ChaosDiffTimeStamp contains the difference of current timestamp and start timestamp
//It will helpful to track the total chaos duration
chaosDiffTimeStamp := ChaosCurrentTimeStamp - ChaosStartTimeStamp
if int(chaosDiffTimeStamp) >= experimentsDetails.ChaosDuration {
failFlag = true
break
}

} else {
break
}
}
if failFlag == true {
err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName)
if err != nil {
return errors.Errorf("Unable to perform autoscaling, due to %v", err)
}
return errors.Errorf("Application pod fails to come in running state after Chaos Duration of %d sec", experimentsDetails.ChaosDuration)
}
// Keeping a wait time of 10s after all pod comes in running state
// This is optional and used just for viewing the pod status
time.Sleep(10 * time.Second)

return nil
}

//AutoscalerRecovery scale back to initial number of replica
func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string) error {
ksatchit marked this conversation as resolved.
Show resolved Hide resolved

applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.ChaosNamespace)

// Scale back to initial number of replicas
retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error {
// Retrieve the latest version of Deployment before attempting update
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver
appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Failed to get latest version of Application Deployment: %v", err)
}

appUnderTest.Spec.Replicas = int32Ptr(int32(replicaCount)) // modify replica count
_, updateErr := applicationClient.Update(appUnderTest)
return updateErr
})
if retryErr != nil {
return errors.Errorf("Unable to scale the, due to: %v", retryErr)
}
log.Info("[Info]: Application pod started rolling back")

applicationDeploy, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Unable to get the application, err: %v", err)
}

failFlag := false
// Check for 30 retries with 2secs of delay
for count := 0; count < 30; count++ {
ksatchit marked this conversation as resolved.
Show resolved Hide resolved

if int(applicationDeploy.Status.AvailableReplicas) != replicaCount {

applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{})
if err != nil {
return errors.Errorf("Unable to get the application, err: %v", err)
}
time.Sleep(2 * time.Second)
if count == 30 {
failFlag = true
break
}

} else {
break
}
}
if failFlag == true {
return errors.Errorf("Application fails to roll back")
}
log.Info("[RollBack]: Application Pod roll back to initial number of replicas")

return nil
}

func int32Ptr(i int32) *int32 { return &i }
2 changes: 1 addition & 1 deletion chaoslib/litmus/pod_cpu_hog/pod-cpu-hog.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func ExperimentCPU(experimentsDetails *experimentTypes.ExperimentDetails, client
}
// updating the chaosresult after stopped
failStep := "CPU hog Chaos injection stopped!"
types.SetResultAfterCompletion(resultDetails, "Fail", "Stopped", failStep)
types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep)
result.ChaosResult(chaosDetails, clients, resultDetails, "EOT")

// generating summary event in chaosengine
Expand Down
2 changes: 1 addition & 1 deletion chaoslib/litmus/pod_memory_hog/pod-memory-hog.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func ExperimentMemory(experimentsDetails *experimentTypes.ExperimentDetails, cli
}
// updating the chaosresult after stopped
failStep := "Memory hog Chaos injection stopped!"
types.SetResultAfterCompletion(resultDetails, "Fail", "Stopped", failStep)
types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep)
result.ChaosResult(chaosDetails, clients, resultDetails, "EOT")

// generating summary event in chaosengine
Expand Down
14 changes: 14 additions & 0 deletions experiments/generic/pod-autoscaler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Experiment Metadata

<table>
<tr>
<th> Name </th>
<th> Description </th>
<th> Documentation Link </th>
</tr>
<tr>
<td> Pod Autoscaler</td>
<td> Scale the deployment replicas to check the autoscaling capability. </td>
<td> <a href="https://docs.litmuschaos.io/docs/pod-autoscaler/"> Here </a> </td>
</tr>
</table>
134 changes: 134 additions & 0 deletions experiments/generic/pod-autoscaler/pod-autoscaler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package main

import (
"github.com/litmuschaos/litmus-go/chaoslib/litmus/pod_autoscaler"
clients "github.com/litmuschaos/litmus-go/pkg/clients"
"github.com/litmuschaos/litmus-go/pkg/events"
experimentEnv "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/environment"
experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types"
"github.com/litmuschaos/litmus-go/pkg/log"
"github.com/litmuschaos/litmus-go/pkg/result"
"github.com/litmuschaos/litmus-go/pkg/status"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/sirupsen/logrus"
)

func init() {
// Log as JSON instead of the default ASCII formatter.
logrus.SetFormatter(&logrus.TextFormatter{
FullTimestamp: true,
DisableSorting: true,
DisableLevelTruncation: true,
})
}

func main() {

var err error
experimentsDetails := experimentTypes.ExperimentDetails{}
resultDetails := types.ResultDetails{}
eventsDetails := types.EventDetails{}
clients := clients.ClientSets{}
chaosDetails := types.ChaosDetails{}

//Getting kubeConfig and Generate ClientSets
if err := clients.GenerateClientSetFromKubeConfig(); err != nil {
log.Fatalf("Unable to Get the kubeconfig due to %v", err)
}

//Fetching all the ENV passed from the runner pod
log.Infof("[PreReq]: Getting the ENV for the %v experiment", experimentsDetails.ExperimentName)
experimentEnv.GetENV(&experimentsDetails, "pod-autoscaler")

// Intialise the chaos attributes
experimentEnv.InitialiseChaosVariables(&chaosDetails, &experimentsDetails)

// Intialise Chaos Result Parameters
types.SetResultAttributes(&resultDetails, chaosDetails)

//Updating the chaos result in the beginning of experiment
log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName)
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT")
if err != nil {
log.Errorf("Unable to Create the Chaos Result due to %v", err)
failStep := "Updating the chaos result of pod-delete experiment (SOT)"
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails)
return
}

// Set the chaos result uid
result.SetResultUID(&resultDetails, clients, &chaosDetails)

//DISPLAY THE APP INFORMATION
log.InfoWithValues("The application informations are as follows", logrus.Fields{
"Namespace": experimentsDetails.AppNS,
"Label": experimentsDetails.AppLabel,
"Ramp Time": experimentsDetails.RampTime,
})

//PRE-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients)
if err != nil {
log.Errorf("Application status check failed due to %v\n", err)
failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)"
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep)
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
return
}
if experimentsDetails.EngineName != "" {
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine")
}

// Including the litmus lib for pod-autoscaler
if experimentsDetails.ChaosLib == "litmus" {
err = pod_autoscaler.PreparePodAutoscaler(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails)
if err != nil {
log.Errorf("Chaos injection failed due to %v\n", err)
failStep := "Including the litmus lib for pod-autoscaler"
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep)
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
return
}
log.Info("[Confirmation]: The application pod autoscaler completed successfully")
resultDetails.Verdict = "Pass"
} else {
log.Error("[Invalid]: Please Provide the correct LIB")
failStep := "Including the litmus lib for pod-autoscaler"
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep)
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
return
}

//POST-CHAOS APPLICATION STATUS CHECK
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients)
if err != nil {
log.Errorf("Application status check failed due to %v\n", err)
failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)"
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep)
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
return
}
if experimentsDetails.EngineName != "" {
types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine")
}

//Updating the chaosResult in the end of experiment
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName)
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT")
if err != nil {
log.Fatalf("Unable to Update the Chaos Result due to %v\n", err)
}
if experimentsDetails.EngineName != "" {
msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed"
types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine")
}

msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed"
types.SetResultEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &resultDetails)
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult")
}
Loading