-
Notifications
You must be signed in to change notification settings - Fork 108
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(exp): Add pod autoscaler experiment used to check the scalabili…
…ty of the application pod (#65) * chore(exp): Add pod autoscaler experiment used to check the salability of the application pod Signed-off-by: Udit Gaurav <uditgaurav@gmail.com> * Adding abort in the experiment Signed-off-by: Udit Gaurav <udit.gaurav@mayadata.io>
- Loading branch information
1 parent
55b0a6f
commit 9b866d7
Showing
17 changed files
with
1,160 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
package pod_autoscaler | ||
|
||
import ( | ||
"strconv" | ||
"time" | ||
|
||
clients "github.com/litmuschaos/litmus-go/pkg/clients" | ||
experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" | ||
"github.com/litmuschaos/litmus-go/pkg/log" | ||
"github.com/litmuschaos/litmus-go/pkg/types" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
retries "k8s.io/client-go/util/retry" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
var err error | ||
|
||
//PreparePodAutoscaler contains the prepration steps before chaos injection | ||
func PreparePodAutoscaler(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { | ||
|
||
appName, replicaCount, err := GetApplicationDetails(experimentsDetails, clients) | ||
if err != nil { | ||
return errors.Errorf("Unable to get the relicaCount of the application, err: %v", err) | ||
} | ||
|
||
//Waiting for the ramp time before chaos injection | ||
if experimentsDetails.RampTime != 0 { | ||
log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) | ||
waitForRampTime(experimentsDetails) | ||
} | ||
|
||
err = PodAutoscalerChaos(experimentsDetails, clients, replicaCount, appName, resultDetails, eventsDetails, chaosDetails) | ||
|
||
if err != nil { | ||
return errors.Errorf("Unable to perform autoscaling, due to %v", err) | ||
} | ||
|
||
err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) | ||
if err != nil { | ||
return errors.Errorf("Unable to perform autoscaling, due to %v", err) | ||
} | ||
|
||
//Waiting for the ramp time after chaos injection | ||
if experimentsDetails.RampTime != 0 { | ||
log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) | ||
waitForRampTime(experimentsDetails) | ||
} | ||
return nil | ||
} | ||
|
||
//waitForRampTime waits for the given ramp time duration (in seconds) | ||
func waitForRampTime(experimentsDetails *experimentTypes.ExperimentDetails) { | ||
time.Sleep(time.Duration(experimentsDetails.RampTime) * time.Second) | ||
} | ||
|
||
//GetApplicationDetails is used to get the application name, replicas of the application | ||
func GetApplicationDetails(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (string, int, error) { | ||
|
||
var appReplica int | ||
var appName string | ||
// Get Deployment replica count | ||
applicationList, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.AppLabel}) | ||
if err != nil || len(applicationList.Items) == 0 { | ||
return "", 0, errors.Errorf("Unable to get application, err: %v", err) | ||
} | ||
for _, app := range applicationList.Items { | ||
appReplica = int(*app.Spec.Replicas) | ||
appName = app.Name | ||
|
||
} | ||
return appName, appReplica, nil | ||
|
||
} | ||
|
||
//PodAutoscalerChaos scales up the application pod replicas | ||
func PodAutoscalerChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { | ||
|
||
applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS) | ||
|
||
replicas := int32(experimentsDetails.Replicas) | ||
// Scale Application | ||
retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error { | ||
// Retrieve the latest version of Deployment before attempting update | ||
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver | ||
appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) | ||
} | ||
|
||
appUnderTest.Spec.Replicas = int32Ptr(replicas) // modify replica count | ||
_, updateErr := applicationClient.Update(appUnderTest) | ||
return updateErr | ||
}) | ||
if retryErr != nil { | ||
return errors.Errorf("Unable to scale the application, due to: %v", retryErr) | ||
} | ||
log.Info("Application Started Scaling") | ||
|
||
err = ApplicationPodStatusCheck(experimentsDetails, appName, clients, replicaCount, resultDetails, eventsDetails, chaosDetails) | ||
if err != nil { | ||
return errors.Errorf("Status Check failed, err: %v", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// ApplicationPodStatusCheck checks the status of the application pod | ||
func ApplicationPodStatusCheck(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets, replicaCount int, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { | ||
|
||
//ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin | ||
ChaosStartTimeStamp := time.Now().Unix() | ||
failFlag := false | ||
applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS) | ||
applicationDeploy, err := applicationClient.Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Unable to get the application, err: %v", err) | ||
} | ||
for count := 0; count < int(experimentsDetails.ChaosDuration/2); count++ { | ||
|
||
if int(applicationDeploy.Status.AvailableReplicas) != experimentsDetails.Replicas { | ||
|
||
log.Infof("Application Pod Avaliable Count is: %s", strconv.Itoa(int(applicationDeploy.Status.AvailableReplicas))) | ||
applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Unable to get the application, err: %v", err) | ||
} | ||
|
||
time.Sleep(2 * time.Second) | ||
//ChaosCurrentTimeStamp contains the current timestamp | ||
ChaosCurrentTimeStamp := time.Now().Unix() | ||
|
||
//ChaosDiffTimeStamp contains the difference of current timestamp and start timestamp | ||
//It will helpful to track the total chaos duration | ||
chaosDiffTimeStamp := ChaosCurrentTimeStamp - ChaosStartTimeStamp | ||
if int(chaosDiffTimeStamp) >= experimentsDetails.ChaosDuration { | ||
failFlag = true | ||
break | ||
} | ||
|
||
} else { | ||
break | ||
} | ||
} | ||
if failFlag == true { | ||
err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) | ||
if err != nil { | ||
return errors.Errorf("Unable to perform autoscaling, due to %v", err) | ||
} | ||
return errors.Errorf("Application pod fails to come in running state after Chaos Duration of %d sec", experimentsDetails.ChaosDuration) | ||
} | ||
// Keeping a wait time of 10s after all pod comes in running state | ||
// This is optional and used just for viewing the pod status | ||
time.Sleep(10 * time.Second) | ||
|
||
return nil | ||
} | ||
|
||
//AutoscalerRecovery scale back to initial number of replica | ||
func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string) error { | ||
|
||
applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.ChaosNamespace) | ||
|
||
// Scale back to initial number of replicas | ||
retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error { | ||
// Retrieve the latest version of Deployment before attempting update | ||
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver | ||
appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) | ||
} | ||
|
||
appUnderTest.Spec.Replicas = int32Ptr(int32(replicaCount)) // modify replica count | ||
_, updateErr := applicationClient.Update(appUnderTest) | ||
return updateErr | ||
}) | ||
if retryErr != nil { | ||
return errors.Errorf("Unable to scale the, due to: %v", retryErr) | ||
} | ||
log.Info("[Info]: Application pod started rolling back") | ||
|
||
applicationDeploy, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Unable to get the application, err: %v", err) | ||
} | ||
|
||
failFlag := false | ||
// Check for 30 retries with 2secs of delay | ||
for count := 0; count < 30; count++ { | ||
|
||
if int(applicationDeploy.Status.AvailableReplicas) != replicaCount { | ||
|
||
applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{}) | ||
if err != nil { | ||
return errors.Errorf("Unable to get the application, err: %v", err) | ||
} | ||
time.Sleep(2 * time.Second) | ||
if count == 30 { | ||
failFlag = true | ||
break | ||
} | ||
|
||
} else { | ||
break | ||
} | ||
} | ||
if failFlag == true { | ||
return errors.Errorf("Application fails to roll back") | ||
} | ||
log.Info("[RollBack]: Application Pod roll back to initial number of replicas") | ||
|
||
return nil | ||
} | ||
|
||
func int32Ptr(i int32) *int32 { return &i } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
## Experiment Metadata | ||
|
||
<table> | ||
<tr> | ||
<th> Name </th> | ||
<th> Description </th> | ||
<th> Documentation Link </th> | ||
</tr> | ||
<tr> | ||
<td> Pod Autoscaler</td> | ||
<td> Scale the deployment replicas to check the autoscaling capability. </td> | ||
<td> <a href="https://docs.litmuschaos.io/docs/pod-autoscaler/"> Here </a> </td> | ||
</tr> | ||
</table> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
package main | ||
|
||
import ( | ||
"github.com/litmuschaos/litmus-go/chaoslib/litmus/pod_autoscaler" | ||
clients "github.com/litmuschaos/litmus-go/pkg/clients" | ||
"github.com/litmuschaos/litmus-go/pkg/events" | ||
experimentEnv "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/environment" | ||
experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" | ||
"github.com/litmuschaos/litmus-go/pkg/log" | ||
"github.com/litmuschaos/litmus-go/pkg/result" | ||
"github.com/litmuschaos/litmus-go/pkg/status" | ||
"github.com/litmuschaos/litmus-go/pkg/types" | ||
"github.com/sirupsen/logrus" | ||
) | ||
|
||
func init() { | ||
// Log as JSON instead of the default ASCII formatter. | ||
logrus.SetFormatter(&logrus.TextFormatter{ | ||
FullTimestamp: true, | ||
DisableSorting: true, | ||
DisableLevelTruncation: true, | ||
}) | ||
} | ||
|
||
func main() { | ||
|
||
var err error | ||
experimentsDetails := experimentTypes.ExperimentDetails{} | ||
resultDetails := types.ResultDetails{} | ||
eventsDetails := types.EventDetails{} | ||
clients := clients.ClientSets{} | ||
chaosDetails := types.ChaosDetails{} | ||
|
||
//Getting kubeConfig and Generate ClientSets | ||
if err := clients.GenerateClientSetFromKubeConfig(); err != nil { | ||
log.Fatalf("Unable to Get the kubeconfig due to %v", err) | ||
} | ||
|
||
//Fetching all the ENV passed from the runner pod | ||
log.Infof("[PreReq]: Getting the ENV for the %v experiment", experimentsDetails.ExperimentName) | ||
experimentEnv.GetENV(&experimentsDetails, "pod-autoscaler") | ||
|
||
// Intialise the chaos attributes | ||
experimentEnv.InitialiseChaosVariables(&chaosDetails, &experimentsDetails) | ||
|
||
// Intialise Chaos Result Parameters | ||
types.SetResultAttributes(&resultDetails, chaosDetails) | ||
|
||
//Updating the chaos result in the beginning of experiment | ||
log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) | ||
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") | ||
if err != nil { | ||
log.Errorf("Unable to Create the Chaos Result due to %v", err) | ||
failStep := "Updating the chaos result of pod-delete experiment (SOT)" | ||
result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) | ||
return | ||
} | ||
|
||
// Set the chaos result uid | ||
result.SetResultUID(&resultDetails, clients, &chaosDetails) | ||
|
||
//DISPLAY THE APP INFORMATION | ||
log.InfoWithValues("The application informations are as follows", logrus.Fields{ | ||
"Namespace": experimentsDetails.AppNS, | ||
"Label": experimentsDetails.AppLabel, | ||
"Ramp Time": experimentsDetails.RampTime, | ||
}) | ||
|
||
//PRE-CHAOS APPLICATION STATUS CHECK | ||
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") | ||
err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) | ||
if err != nil { | ||
log.Errorf("Application status check failed due to %v\n", err) | ||
failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" | ||
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) | ||
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") | ||
return | ||
} | ||
if experimentsDetails.EngineName != "" { | ||
types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails) | ||
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") | ||
} | ||
|
||
// Including the litmus lib for pod-autoscaler | ||
if experimentsDetails.ChaosLib == "litmus" { | ||
err = pod_autoscaler.PreparePodAutoscaler(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) | ||
if err != nil { | ||
log.Errorf("Chaos injection failed due to %v\n", err) | ||
failStep := "Including the litmus lib for pod-autoscaler" | ||
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) | ||
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") | ||
return | ||
} | ||
log.Info("[Confirmation]: The application pod autoscaler completed successfully") | ||
resultDetails.Verdict = "Pass" | ||
} else { | ||
log.Error("[Invalid]: Please Provide the correct LIB") | ||
failStep := "Including the litmus lib for pod-autoscaler" | ||
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) | ||
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") | ||
return | ||
} | ||
|
||
//POST-CHAOS APPLICATION STATUS CHECK | ||
log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") | ||
err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) | ||
if err != nil { | ||
log.Errorf("Application status check failed due to %v\n", err) | ||
failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" | ||
types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) | ||
result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") | ||
return | ||
} | ||
if experimentsDetails.EngineName != "" { | ||
types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails) | ||
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") | ||
} | ||
|
||
//Updating the chaosResult in the end of experiment | ||
log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) | ||
err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") | ||
if err != nil { | ||
log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) | ||
} | ||
if experimentsDetails.EngineName != "" { | ||
msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" | ||
types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) | ||
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") | ||
} | ||
|
||
msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" | ||
types.SetResultEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &resultDetails) | ||
events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") | ||
} |
Oops, something went wrong.