diff --git a/build/generate_go_binary b/build/generate_go_binary index b174b3089..3ef83870d 100644 --- a/build/generate_go_binary +++ b/build/generate_go_binary @@ -26,5 +26,7 @@ go build -o build/_output/node-cpu-hog ./experiments/generic/node-cpu-hog go build -o build/_output/container-kill ./experiments/generic/container-kill # Buiding go binaries for disk_fill experiment go build -o build/_output/disk-fill ./experiments/generic/disk-fill +# Buiding go binaries for pod-autoscaler experiment +go build -o build/_output/pod-autoscaler ./experiments/generic/pod-autoscaler # Buiding go binaries for container_kill helper go build -o build/_output/container-killer ./chaoslib/litmus/container_kill/helper diff --git a/chaoslib/litmus/pod_autoscaler/pod-autoscaler.go b/chaoslib/litmus/pod_autoscaler/pod-autoscaler.go new file mode 100644 index 000000000..a0be3b876 --- /dev/null +++ b/chaoslib/litmus/pod_autoscaler/pod-autoscaler.go @@ -0,0 +1,215 @@ +package pod_autoscaler + +import ( + "strconv" + "time" + + clients "github.com/litmuschaos/litmus-go/pkg/clients" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + retries "k8s.io/client-go/util/retry" + + "github.com/pkg/errors" +) + +var err error + +//PreparePodAutoscaler contains the prepration steps before chaos injection +func PreparePodAutoscaler(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + appName, replicaCount, err := GetApplicationDetails(experimentsDetails, clients) + if err != nil { + return errors.Errorf("Unable to get the relicaCount of the application, err: %v", err) + } + + //Waiting for the ramp time before chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + waitForRampTime(experimentsDetails) + } + + err = PodAutoscalerChaos(experimentsDetails, clients, replicaCount, appName, resultDetails, eventsDetails, chaosDetails) + + if err != nil { + return errors.Errorf("Unable to perform autoscaling, due to %v", err) + } + + err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) + if err != nil { + return errors.Errorf("Unable to perform autoscaling, due to %v", err) + } + + //Waiting for the ramp time after chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + waitForRampTime(experimentsDetails) + } + return nil +} + +//waitForRampTime waits for the given ramp time duration (in seconds) +func waitForRampTime(experimentsDetails *experimentTypes.ExperimentDetails) { + time.Sleep(time.Duration(experimentsDetails.RampTime) * time.Second) +} + +//GetApplicationDetails is used to get the application name, replicas of the application +func GetApplicationDetails(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (string, int, error) { + + var appReplica int + var appName string + // Get Deployment replica count + applicationList, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.AppLabel}) + if err != nil || len(applicationList.Items) == 0 { + return "", 0, errors.Errorf("Unable to get application, err: %v", err) + } + for _, app := range applicationList.Items { + appReplica = int(*app.Spec.Replicas) + appName = app.Name + + } + return appName, appReplica, nil + +} + +//PodAutoscalerChaos scales up the application pod replicas +func PodAutoscalerChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS) + + replicas := int32(experimentsDetails.Replicas) + // Scale Application + retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error { + // Retrieve the latest version of Deployment before attempting update + // RetryOnConflict uses exponential backoff to avoid exhausting the apiserver + appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) + } + + appUnderTest.Spec.Replicas = int32Ptr(replicas) // modify replica count + _, updateErr := applicationClient.Update(appUnderTest) + return updateErr + }) + if retryErr != nil { + return errors.Errorf("Unable to scale the application, due to: %v", retryErr) + } + log.Info("Application Started Scaling") + + err = ApplicationPodStatusCheck(experimentsDetails, appName, clients, replicaCount, resultDetails, eventsDetails, chaosDetails) + if err != nil { + return errors.Errorf("Status Check failed, err: %v", err) + } + + return nil +} + +// ApplicationPodStatusCheck checks the status of the application pod +func ApplicationPodStatusCheck(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets, replicaCount int, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + + //ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin + ChaosStartTimeStamp := time.Now().Unix() + failFlag := false + applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS) + applicationDeploy, err := applicationClient.Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Unable to get the application, err: %v", err) + } + for count := 0; count < int(experimentsDetails.ChaosDuration/2); count++ { + + if int(applicationDeploy.Status.AvailableReplicas) != experimentsDetails.Replicas { + + log.Infof("Application Pod Avaliable Count is: %s", strconv.Itoa(int(applicationDeploy.Status.AvailableReplicas))) + applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Unable to get the application, err: %v", err) + } + + time.Sleep(2 * time.Second) + //ChaosCurrentTimeStamp contains the current timestamp + ChaosCurrentTimeStamp := time.Now().Unix() + + //ChaosDiffTimeStamp contains the difference of current timestamp and start timestamp + //It will helpful to track the total chaos duration + chaosDiffTimeStamp := ChaosCurrentTimeStamp - ChaosStartTimeStamp + if int(chaosDiffTimeStamp) >= experimentsDetails.ChaosDuration { + failFlag = true + break + } + + } else { + break + } + } + if failFlag == true { + err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) + if err != nil { + return errors.Errorf("Unable to perform autoscaling, due to %v", err) + } + return errors.Errorf("Application pod fails to come in running state after Chaos Duration of %d sec", experimentsDetails.ChaosDuration) + } + // Keeping a wait time of 10s after all pod comes in running state + // This is optional and used just for viewing the pod status + time.Sleep(10 * time.Second) + + return nil +} + +//AutoscalerRecovery scale back to initial number of replica +func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, replicaCount int, appName string) error { + + applicationClient := clients.KubeClient.AppsV1().Deployments(experimentsDetails.ChaosNamespace) + + // Scale back to initial number of replicas + retryErr := retries.RetryOnConflict(retries.DefaultRetry, func() error { + // Retrieve the latest version of Deployment before attempting update + // RetryOnConflict uses exponential backoff to avoid exhausting the apiserver + appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) + } + + appUnderTest.Spec.Replicas = int32Ptr(int32(replicaCount)) // modify replica count + _, updateErr := applicationClient.Update(appUnderTest) + return updateErr + }) + if retryErr != nil { + return errors.Errorf("Unable to scale the, due to: %v", retryErr) + } + log.Info("[Info]: Application pod started rolling back") + + applicationDeploy, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Unable to get the application, err: %v", err) + } + + failFlag := false + // Check for 30 retries with 2secs of delay + for count := 0; count < 30; count++ { + + if int(applicationDeploy.Status.AvailableReplicas) != replicaCount { + + applicationDeploy, err = applicationClient.Get(appName, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Unable to get the application, err: %v", err) + } + time.Sleep(2 * time.Second) + if count == 30 { + failFlag = true + break + } + + } else { + break + } + } + if failFlag == true { + return errors.Errorf("Application fails to roll back") + } + log.Info("[RollBack]: Application Pod roll back to initial number of replicas") + + return nil +} + +func int32Ptr(i int32) *int32 { return &i } diff --git a/chaoslib/litmus/pod_cpu_hog/pod-cpu-hog.go b/chaoslib/litmus/pod_cpu_hog/pod-cpu-hog.go index de5f3dae1..c2e99f095 100644 --- a/chaoslib/litmus/pod_cpu_hog/pod-cpu-hog.go +++ b/chaoslib/litmus/pod_cpu_hog/pod-cpu-hog.go @@ -91,7 +91,7 @@ func ExperimentCPU(experimentsDetails *experimentTypes.ExperimentDetails, client } // updating the chaosresult after stopped failStep := "CPU hog Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Fail", "Stopped", failStep) + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") // generating summary event in chaosengine diff --git a/chaoslib/litmus/pod_memory_hog/pod-memory-hog.go b/chaoslib/litmus/pod_memory_hog/pod-memory-hog.go index f41c44673..dbb6173d1 100644 --- a/chaoslib/litmus/pod_memory_hog/pod-memory-hog.go +++ b/chaoslib/litmus/pod_memory_hog/pod-memory-hog.go @@ -96,7 +96,7 @@ func ExperimentMemory(experimentsDetails *experimentTypes.ExperimentDetails, cli } // updating the chaosresult after stopped failStep := "Memory hog Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Fail", "Stopped", failStep) + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") // generating summary event in chaosengine diff --git a/experiments/generic/pod-autoscaler/README.md b/experiments/generic/pod-autoscaler/README.md new file mode 100644 index 000000000..3792e25f3 --- /dev/null +++ b/experiments/generic/pod-autoscaler/README.md @@ -0,0 +1,14 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Pod Autoscaler Scale the deployment replicas to check the autoscaling capability. Here
\ No newline at end of file diff --git a/experiments/generic/pod-autoscaler/pod-autoscaler.go b/experiments/generic/pod-autoscaler/pod-autoscaler.go new file mode 100644 index 000000000..d4cc4ebe9 --- /dev/null +++ b/experiments/generic/pod-autoscaler/pod-autoscaler.go @@ -0,0 +1,134 @@ +package main + +import ( + "github.com/litmuschaos/litmus-go/chaoslib/litmus/pod_autoscaler" + clients "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/result" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/sirupsen/logrus" +) + +func init() { + // Log as JSON instead of the default ASCII formatter. + logrus.SetFormatter(&logrus.TextFormatter{ + FullTimestamp: true, + DisableSorting: true, + DisableLevelTruncation: true, + }) +} + +func main() { + + var err error + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + clients := clients.ClientSets{} + chaosDetails := types.ChaosDetails{} + + //Getting kubeConfig and Generate ClientSets + if err := clients.GenerateClientSetFromKubeConfig(); err != nil { + log.Fatalf("Unable to Get the kubeconfig due to %v", err) + } + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", experimentsDetails.ExperimentName) + experimentEnv.GetENV(&experimentsDetails, "pod-autoscaler") + + // Intialise the chaos attributes + experimentEnv.InitialiseChaosVariables(&chaosDetails, &experimentsDetails) + + // Intialise Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") + if err != nil { + log.Errorf("Unable to Create the Chaos Result due to %v", err) + failStep := "Updating the chaos result of pod-delete experiment (SOT)" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + result.SetResultUID(&resultDetails, clients, &chaosDetails) + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("The application informations are as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Ramp Time": experimentsDetails.RampTime, + }) + + //PRE-CHAOS APPLICATION STATUS CHECK + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) + if err != nil { + log.Errorf("Application status check failed due to %v\n", err) + failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) + result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + return + } + if experimentsDetails.EngineName != "" { + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib for pod-autoscaler + if experimentsDetails.ChaosLib == "litmus" { + err = pod_autoscaler.PreparePodAutoscaler(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) + if err != nil { + log.Errorf("Chaos injection failed due to %v\n", err) + failStep := "Including the litmus lib for pod-autoscaler" + types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) + result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + return + } + log.Info("[Confirmation]: The application pod autoscaler completed successfully") + resultDetails.Verdict = "Pass" + } else { + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "Including the litmus lib for pod-autoscaler" + types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) + result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + return + } + + //POST-CHAOS APPLICATION STATUS CHECK + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) + if err != nil { + log.Errorf("Application status check failed due to %v\n", err) + failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) + result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + return + } + if experimentsDetails.EngineName != "" { + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT is Running successfully", "Normal", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + if err != nil { + log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + } + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" + types.SetResultEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &resultDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") +} diff --git a/experiments/generic/pod-autoscaler/rbac.yaml b/experiments/generic/pod-autoscaler/rbac.yaml new file mode 100644 index 000000000..5fe99c686 --- /dev/null +++ b/experiments/generic/pod-autoscaler/rbac.yaml @@ -0,0 +1,37 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-autoscaler-sa + namespace: default + labels: + name: pod-autoscaler-sa +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: pod-autoscaler-sa + labels: + name: pod-autoscaler-sa +rules: +- apiGroups: ["","litmuschaos.io","batch","apps"] + resources: ["pods","deployments","jobs","events","chaosengines","pods/log","chaosexperiments","chaosresults"] + verbs: ["create","list","get","patch","update","delete"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get","list"] +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: pod-autoscaler-sa + labels: + name: pod-autoscaler-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pod-autoscaler-sa +subjects: +- kind: ServiceAccount + name: pod-autoscaler-sa + namespace: default diff --git a/experiments/generic/pod-autoscaler/test/test.yml b/experiments/generic/pod-autoscaler/test/test.yml new file mode 100644 index 000000000..2ee838ed7 --- /dev/null +++ b/experiments/generic/pod-autoscaler/test/test.yml @@ -0,0 +1,57 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: pod-autoscaler-sa + containers: + - name: gotest + image: busybox + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: '' + + # provide application labels + - name: APP_LABEL + value: '' + + # provide application kind + - name: APP_KIND + value: '' + + - name: TOTAL_CHAOS_DURATION + value: '30' + + - name: REPLICA_COUNT + value: '' + + - name: CHAOS_NAMESPACE + value: '' + + ## Period to wait before/after injection of chaos + - name: RAMP_TIME + value: '' + + ## env var that describes the library used to execute the chaos + ## Only supported litmus + - name: LIB + value: 'litmus' + + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName diff --git a/go.sum b/go.sum index e4bbdcf14..2d96d9423 100644 --- a/go.sum +++ b/go.sum @@ -439,7 +439,6 @@ github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z github.com/litmuschaos/chaos-operator v0.0.0-20200813094130-29975d957deb h1:lzd3WVSylUHl8p9iFHtywbfp3YY0y8MB0FjfNuxuIdA= github.com/litmuschaos/chaos-operator v0.0.0-20200813094130-29975d957deb/go.mod h1:QHmeCgV1yDMBnNk5tz0c9OVGUUC7MWrVIAlrPWMRhK4= github.com/litmuschaos/elves v0.0.0-20200502084504-13be0e6937ee/go.mod h1:DsbHGNUq/78NZozWVVI9Q6eBei4I+JjlkkD5aibJ3MQ= -github.com/litmuschaos/litmus v0.0.0-20200812150524-3b68066d755a h1:Vie7SS45Jp0CIUtzHdjSflqst0CnW1tIBeygajG8l84= github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA= github.com/lucas-clemente/aes12 v0.0.0-20171027163421-cd47fb39b79f/go.mod h1:JpH9J1c9oX6otFSgdUHwUBUizmKlrMjxWnIAjff4m04= github.com/lucas-clemente/quic-clients v0.1.0/go.mod h1:y5xVIEoObKqULIKivu+gD/LU90pL73bTdtQjPBvtCBk= @@ -531,7 +530,6 @@ github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59P github.com/opencontainers/runc v1.0.0-rc2.0.20190611121236-6cc515888830/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runtime-spec v1.0.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.2.2/go.mod h1:+BLncwf63G4dgOzykXAxcmnFlUaOlkDdmw/CqsW6pjs= -github.com/openebs/litmus v0.0.0-20200811143911-7afcbbbbc10f h1:yNkTsy+IxrKHu72vGnekIs+4txmdkYXXz9dskYgl0A8= github.com/openebs/maya v0.0.0-20200411140727-1c81f9e017b0 h1:9o6+N3YkssQvUlmJnqNULSxsGFO/rb1we8MtYKr5ze4= github.com/openebs/maya v0.0.0-20200411140727-1c81f9e017b0/go.mod h1:QQY9cOHKQwZ73qbv6O//UYUBLNV2S0MRDIfG7t5KOCk= github.com/openshift/api v3.9.1-0.20190924102528-32369d4db2ad+incompatible/go.mod h1:dh9o4Fs58gpFXGSYfnVxGR9PnV53I8TW84pQaJDdGiY= diff --git a/pkg/generic/pod-autoscaler/environment/environment.go b/pkg/generic/pod-autoscaler/environment/environment.go new file mode 100644 index 000000000..ec049e71e --- /dev/null +++ b/pkg/generic/pod-autoscaler/environment/environment.go @@ -0,0 +1,51 @@ +package environment + +import ( + "os" + "strconv" + + experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" + "github.com/litmuschaos/litmus-go/pkg/types" + clientTypes "k8s.io/apimachinery/pkg/types" +) + +//GetENV fetches all the env variables from the runner pod +func GetENV(experimentDetails *experimentTypes.ExperimentDetails, expName string) { + experimentDetails.ExperimentName = expName + experimentDetails.ChaosNamespace = Getenv("CHAOS_NAMESPACE", "litmus") + experimentDetails.EngineName = Getenv("CHAOSENGINE", "") + experimentDetails.ChaosDuration, _ = strconv.Atoi(Getenv("TOTAL_CHAOS_DURATION", "60")) + experimentDetails.RampTime, _ = strconv.Atoi(Getenv("RAMP_TIME", "0")) + experimentDetails.ChaosLib = Getenv("LIB", "litmus") + experimentDetails.AppNS = Getenv("APP_NAMESPACE", "") + experimentDetails.AppLabel = Getenv("APP_LABEL", "") + experimentDetails.AppKind = Getenv("APP_KIND", "") + experimentDetails.Replicas, _ = strconv.Atoi(Getenv("REPLICA_COUNT", "")) + experimentDetails.ChaosUID = clientTypes.UID(Getenv("CHAOS_UID", "")) + experimentDetails.InstanceID = Getenv("INSTANCE_ID", "") + experimentDetails.ChaosPodName = Getenv("POD_NAME", "") + experimentDetails.AuxiliaryAppInfo = Getenv("AUXILIARY_APPINFO", "") + experimentDetails.Delay, _ = strconv.Atoi(Getenv("STATUS_CHECK_DELAY", "2")) + experimentDetails.Timeout, _ = strconv.Atoi(Getenv("STATUS_CHECK_TIMEOUT", "180")) +} + +// Getenv fetch the env and set the default value, if any +func Getenv(key string, defaultValue string) string { + value := os.Getenv(key) + if value == "" { + value = defaultValue + } + return value +} + +//InitialiseChaosVariables initialise all the global variables +func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetails *experimentTypes.ExperimentDetails) { + + chaosDetails.ChaosNamespace = experimentDetails.ChaosNamespace + chaosDetails.ChaosPodName = experimentDetails.ChaosPodName + chaosDetails.ChaosUID = experimentDetails.ChaosUID + chaosDetails.EngineName = experimentDetails.EngineName + chaosDetails.ExperimentName = experimentDetails.ExperimentName + chaosDetails.InstanceID = experimentDetails.InstanceID + +} diff --git a/pkg/generic/pod-autoscaler/types/types.go b/pkg/generic/pod-autoscaler/types/types.go new file mode 100644 index 000000000..2fe7f45b7 --- /dev/null +++ b/pkg/generic/pod-autoscaler/types/types.go @@ -0,0 +1,26 @@ +package types + +import ( + clientTypes "k8s.io/apimachinery/pkg/types" +) + +// ExperimentDetails is for collecting all the experiment-related details +type ExperimentDetails struct { + ExperimentName string + EngineName string + ChaosDuration int + RampTime int + Replicas int + ChaosLib string + AppNS string + AppLabel string + AppKind string + ChaosUID clientTypes.UID + InstanceID string + ChaosNamespace string + ChaosPodName string + RunID string + AuxiliaryAppInfo string + Timeout int + Delay int +} diff --git a/pkg/status/application.go b/pkg/status/application.go index cb1a93cfd..f34d6eb67 100644 --- a/pkg/status/application.go +++ b/pkg/status/application.go @@ -47,7 +47,7 @@ func CheckAuxiliaryApplicationStatus(AuxiliaryAppDetails string, timeout, delay return nil } -// CheckPodStatus checks the status of the application pod +// CheckPodStatus checks the running status of the application pod func CheckPodStatus(appNs string, appLabel string, timeout, delay int, clients clients.ClientSets) error { err := retry. Times(uint(timeout / delay)). diff --git a/vendor/k8s.io/apimachinery/pkg/util/wait/doc.go b/vendor/k8s.io/apimachinery/pkg/util/wait/doc.go new file mode 100644 index 000000000..3f0c968ec --- /dev/null +++ b/vendor/k8s.io/apimachinery/pkg/util/wait/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package wait provides tools for polling or listening for changes +// to a condition. +package wait // import "k8s.io/apimachinery/pkg/util/wait" diff --git a/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go b/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go new file mode 100644 index 000000000..386c3e7ea --- /dev/null +++ b/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go @@ -0,0 +1,512 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package wait + +import ( + "context" + "errors" + "math/rand" + "sync" + "time" + + "k8s.io/apimachinery/pkg/util/runtime" +) + +// For any test of the style: +// ... +// <- time.After(timeout): +// t.Errorf("Timed out") +// The value for timeout should effectively be "forever." Obviously we don't want our tests to truly lock up forever, but 30s +// is long enough that it is effectively forever for the things that can slow down a run on a heavily contended machine +// (GC, seeks, etc), but not so long as to make a developer ctrl-c a test run if they do happen to break that test. +var ForeverTestTimeout = time.Second * 30 + +// NeverStop may be passed to Until to make it never stop. +var NeverStop <-chan struct{} = make(chan struct{}) + +// Group allows to start a group of goroutines and wait for their completion. +type Group struct { + wg sync.WaitGroup +} + +func (g *Group) Wait() { + g.wg.Wait() +} + +// StartWithChannel starts f in a new goroutine in the group. +// stopCh is passed to f as an argument. f should stop when stopCh is available. +func (g *Group) StartWithChannel(stopCh <-chan struct{}, f func(stopCh <-chan struct{})) { + g.Start(func() { + f(stopCh) + }) +} + +// StartWithContext starts f in a new goroutine in the group. +// ctx is passed to f as an argument. f should stop when ctx.Done() is available. +func (g *Group) StartWithContext(ctx context.Context, f func(context.Context)) { + g.Start(func() { + f(ctx) + }) +} + +// Start starts f in a new goroutine in the group. +func (g *Group) Start(f func()) { + g.wg.Add(1) + go func() { + defer g.wg.Done() + f() + }() +} + +// Forever calls f every period for ever. +// +// Forever is syntactic sugar on top of Until. +func Forever(f func(), period time.Duration) { + Until(f, period, NeverStop) +} + +// Until loops until stop channel is closed, running f every period. +// +// Until is syntactic sugar on top of JitterUntil with zero jitter factor and +// with sliding = true (which means the timer for period starts after the f +// completes). +func Until(f func(), period time.Duration, stopCh <-chan struct{}) { + JitterUntil(f, period, 0.0, true, stopCh) +} + +// UntilWithContext loops until context is done, running f every period. +// +// UntilWithContext is syntactic sugar on top of JitterUntilWithContext +// with zero jitter factor and with sliding = true (which means the timer +// for period starts after the f completes). +func UntilWithContext(ctx context.Context, f func(context.Context), period time.Duration) { + JitterUntilWithContext(ctx, f, period, 0.0, true) +} + +// NonSlidingUntil loops until stop channel is closed, running f every +// period. +// +// NonSlidingUntil is syntactic sugar on top of JitterUntil with zero jitter +// factor, with sliding = false (meaning the timer for period starts at the same +// time as the function starts). +func NonSlidingUntil(f func(), period time.Duration, stopCh <-chan struct{}) { + JitterUntil(f, period, 0.0, false, stopCh) +} + +// NonSlidingUntilWithContext loops until context is done, running f every +// period. +// +// NonSlidingUntilWithContext is syntactic sugar on top of JitterUntilWithContext +// with zero jitter factor, with sliding = false (meaning the timer for period +// starts at the same time as the function starts). +func NonSlidingUntilWithContext(ctx context.Context, f func(context.Context), period time.Duration) { + JitterUntilWithContext(ctx, f, period, 0.0, false) +} + +// JitterUntil loops until stop channel is closed, running f every period. +// +// If jitterFactor is positive, the period is jittered before every run of f. +// If jitterFactor is not positive, the period is unchanged and not jittered. +// +// If sliding is true, the period is computed after f runs. If it is false then +// period includes the runtime for f. +// +// Close stopCh to stop. f may not be invoked if stop channel is already +// closed. Pass NeverStop to if you don't want it stop. +func JitterUntil(f func(), period time.Duration, jitterFactor float64, sliding bool, stopCh <-chan struct{}) { + var t *time.Timer + var sawTimeout bool + + for { + select { + case <-stopCh: + return + default: + } + + jitteredPeriod := period + if jitterFactor > 0.0 { + jitteredPeriod = Jitter(period, jitterFactor) + } + + if !sliding { + t = resetOrReuseTimer(t, jitteredPeriod, sawTimeout) + } + + func() { + defer runtime.HandleCrash() + f() + }() + + if sliding { + t = resetOrReuseTimer(t, jitteredPeriod, sawTimeout) + } + + // NOTE: b/c there is no priority selection in golang + // it is possible for this to race, meaning we could + // trigger t.C and stopCh, and t.C select falls through. + // In order to mitigate we re-check stopCh at the beginning + // of every loop to prevent extra executions of f(). + select { + case <-stopCh: + return + case <-t.C: + sawTimeout = true + } + } +} + +// JitterUntilWithContext loops until context is done, running f every period. +// +// If jitterFactor is positive, the period is jittered before every run of f. +// If jitterFactor is not positive, the period is unchanged and not jittered. +// +// If sliding is true, the period is computed after f runs. If it is false then +// period includes the runtime for f. +// +// Cancel context to stop. f may not be invoked if context is already expired. +func JitterUntilWithContext(ctx context.Context, f func(context.Context), period time.Duration, jitterFactor float64, sliding bool) { + JitterUntil(func() { f(ctx) }, period, jitterFactor, sliding, ctx.Done()) +} + +// Jitter returns a time.Duration between duration and duration + maxFactor * +// duration. +// +// This allows clients to avoid converging on periodic behavior. If maxFactor +// is 0.0, a suggested default value will be chosen. +func Jitter(duration time.Duration, maxFactor float64) time.Duration { + if maxFactor <= 0.0 { + maxFactor = 1.0 + } + wait := duration + time.Duration(rand.Float64()*maxFactor*float64(duration)) + return wait +} + +// ErrWaitTimeout is returned when the condition exited without success. +var ErrWaitTimeout = errors.New("timed out waiting for the condition") + +// ConditionFunc returns true if the condition is satisfied, or an error +// if the loop should be aborted. +type ConditionFunc func() (done bool, err error) + +// Backoff holds parameters applied to a Backoff function. +type Backoff struct { + // The initial duration. + Duration time.Duration + // Duration is multiplied by factor each iteration, if factor is not zero + // and the limits imposed by Steps and Cap have not been reached. + // Should not be negative. + // The jitter does not contribute to the updates to the duration parameter. + Factor float64 + // The sleep at each iteration is the duration plus an additional + // amount chosen uniformly at random from the interval between + // zero and `jitter*duration`. + Jitter float64 + // The remaining number of iterations in which the duration + // parameter may change (but progress can be stopped earlier by + // hitting the cap). If not positive, the duration is not + // changed. Used for exponential backoff in combination with + // Factor and Cap. + Steps int + // A limit on revised values of the duration parameter. If a + // multiplication by the factor parameter would make the duration + // exceed the cap then the duration is set to the cap and the + // steps parameter is set to zero. + Cap time.Duration +} + +// Step (1) returns an amount of time to sleep determined by the +// original Duration and Jitter and (2) mutates the provided Backoff +// to update its Steps and Duration. +func (b *Backoff) Step() time.Duration { + if b.Steps < 1 { + if b.Jitter > 0 { + return Jitter(b.Duration, b.Jitter) + } + return b.Duration + } + b.Steps-- + + duration := b.Duration + + // calculate the next step + if b.Factor != 0 { + b.Duration = time.Duration(float64(b.Duration) * b.Factor) + if b.Cap > 0 && b.Duration > b.Cap { + b.Duration = b.Cap + b.Steps = 0 + } + } + + if b.Jitter > 0 { + duration = Jitter(duration, b.Jitter) + } + return duration +} + +// contextForChannel derives a child context from a parent channel. +// +// The derived context's Done channel is closed when the returned cancel function +// is called or when the parent channel is closed, whichever happens first. +// +// Note the caller must *always* call the CancelFunc, otherwise resources may be leaked. +func contextForChannel(parentCh <-chan struct{}) (context.Context, context.CancelFunc) { + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + select { + case <-parentCh: + cancel() + case <-ctx.Done(): + } + }() + return ctx, cancel +} + +// ExponentialBackoff repeats a condition check with exponential backoff. +// +// It repeatedly checks the condition and then sleeps, using `backoff.Step()` +// to determine the length of the sleep and adjust Duration and Steps. +// Stops and returns as soon as: +// 1. the condition check returns true or an error, +// 2. `backoff.Steps` checks of the condition have been done, or +// 3. a sleep truncated by the cap on duration has been completed. +// In case (1) the returned error is what the condition function returned. +// In all other cases, ErrWaitTimeout is returned. +func ExponentialBackoff(backoff Backoff, condition ConditionFunc) error { + for backoff.Steps > 0 { + if ok, err := condition(); err != nil || ok { + return err + } + if backoff.Steps == 1 { + break + } + time.Sleep(backoff.Step()) + } + return ErrWaitTimeout +} + +// Poll tries a condition func until it returns true, an error, or the timeout +// is reached. +// +// Poll always waits the interval before the run of 'condition'. +// 'condition' will always be invoked at least once. +// +// Some intervals may be missed if the condition takes too long or the time +// window is too short. +// +// If you want to Poll something forever, see PollInfinite. +func Poll(interval, timeout time.Duration, condition ConditionFunc) error { + return pollInternal(poller(interval, timeout), condition) +} + +func pollInternal(wait WaitFunc, condition ConditionFunc) error { + done := make(chan struct{}) + defer close(done) + return WaitFor(wait, condition, done) +} + +// PollImmediate tries a condition func until it returns true, an error, or the timeout +// is reached. +// +// PollImmediate always checks 'condition' before waiting for the interval. 'condition' +// will always be invoked at least once. +// +// Some intervals may be missed if the condition takes too long or the time +// window is too short. +// +// If you want to immediately Poll something forever, see PollImmediateInfinite. +func PollImmediate(interval, timeout time.Duration, condition ConditionFunc) error { + return pollImmediateInternal(poller(interval, timeout), condition) +} + +func pollImmediateInternal(wait WaitFunc, condition ConditionFunc) error { + done, err := condition() + if err != nil { + return err + } + if done { + return nil + } + return pollInternal(wait, condition) +} + +// PollInfinite tries a condition func until it returns true or an error +// +// PollInfinite always waits the interval before the run of 'condition'. +// +// Some intervals may be missed if the condition takes too long or the time +// window is too short. +func PollInfinite(interval time.Duration, condition ConditionFunc) error { + done := make(chan struct{}) + defer close(done) + return PollUntil(interval, condition, done) +} + +// PollImmediateInfinite tries a condition func until it returns true or an error +// +// PollImmediateInfinite runs the 'condition' before waiting for the interval. +// +// Some intervals may be missed if the condition takes too long or the time +// window is too short. +func PollImmediateInfinite(interval time.Duration, condition ConditionFunc) error { + done, err := condition() + if err != nil { + return err + } + if done { + return nil + } + return PollInfinite(interval, condition) +} + +// PollUntil tries a condition func until it returns true, an error or stopCh is +// closed. +// +// PollUntil always waits interval before the first run of 'condition'. +// 'condition' will always be invoked at least once. +func PollUntil(interval time.Duration, condition ConditionFunc, stopCh <-chan struct{}) error { + ctx, cancel := contextForChannel(stopCh) + defer cancel() + return WaitFor(poller(interval, 0), condition, ctx.Done()) +} + +// PollImmediateUntil tries a condition func until it returns true, an error or stopCh is closed. +// +// PollImmediateUntil runs the 'condition' before waiting for the interval. +// 'condition' will always be invoked at least once. +func PollImmediateUntil(interval time.Duration, condition ConditionFunc, stopCh <-chan struct{}) error { + done, err := condition() + if err != nil { + return err + } + if done { + return nil + } + select { + case <-stopCh: + return ErrWaitTimeout + default: + return PollUntil(interval, condition, stopCh) + } +} + +// WaitFunc creates a channel that receives an item every time a test +// should be executed and is closed when the last test should be invoked. +type WaitFunc func(done <-chan struct{}) <-chan struct{} + +// WaitFor continually checks 'fn' as driven by 'wait'. +// +// WaitFor gets a channel from 'wait()'', and then invokes 'fn' once for every value +// placed on the channel and once more when the channel is closed. If the channel is closed +// and 'fn' returns false without error, WaitFor returns ErrWaitTimeout. +// +// If 'fn' returns an error the loop ends and that error is returned. If +// 'fn' returns true the loop ends and nil is returned. +// +// ErrWaitTimeout will be returned if the 'done' channel is closed without fn ever +// returning true. +// +// When the done channel is closed, because the golang `select` statement is +// "uniform pseudo-random", the `fn` might still run one or multiple time, +// though eventually `WaitFor` will return. +func WaitFor(wait WaitFunc, fn ConditionFunc, done <-chan struct{}) error { + stopCh := make(chan struct{}) + defer close(stopCh) + c := wait(stopCh) + for { + select { + case _, open := <-c: + ok, err := fn() + if err != nil { + return err + } + if ok { + return nil + } + if !open { + return ErrWaitTimeout + } + case <-done: + return ErrWaitTimeout + } + } +} + +// poller returns a WaitFunc that will send to the channel every interval until +// timeout has elapsed and then closes the channel. +// +// Over very short intervals you may receive no ticks before the channel is +// closed. A timeout of 0 is interpreted as an infinity, and in such a case +// it would be the caller's responsibility to close the done channel. +// Failure to do so would result in a leaked goroutine. +// +// Output ticks are not buffered. If the channel is not ready to receive an +// item, the tick is skipped. +func poller(interval, timeout time.Duration) WaitFunc { + return WaitFunc(func(done <-chan struct{}) <-chan struct{} { + ch := make(chan struct{}) + + go func() { + defer close(ch) + + tick := time.NewTicker(interval) + defer tick.Stop() + + var after <-chan time.Time + if timeout != 0 { + // time.After is more convenient, but it + // potentially leaves timers around much longer + // than necessary if we exit early. + timer := time.NewTimer(timeout) + after = timer.C + defer timer.Stop() + } + + for { + select { + case <-tick.C: + // If the consumer isn't ready for this signal drop it and + // check the other channels. + select { + case ch <- struct{}{}: + default: + } + case <-after: + return + case <-done: + return + } + } + }() + + return ch + }) +} + +// resetOrReuseTimer avoids allocating a new timer if one is already in use. +// Not safe for multiple threads. +func resetOrReuseTimer(t *time.Timer, d time.Duration, sawTimeout bool) *time.Timer { + if t == nil { + return time.NewTimer(d) + } + if !t.Stop() && !sawTimeout { + <-t.C + } + t.Reset(d) + return t +} diff --git a/vendor/k8s.io/client-go/util/retry/OWNERS b/vendor/k8s.io/client-go/util/retry/OWNERS new file mode 100644 index 000000000..dec3e88d6 --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/OWNERS @@ -0,0 +1,4 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: +- caesarxuchao diff --git a/vendor/k8s.io/client-go/util/retry/util.go b/vendor/k8s.io/client-go/util/retry/util.go new file mode 100644 index 000000000..c80ff0877 --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/util.go @@ -0,0 +1,84 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package retry + +import ( + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" +) + +// DefaultRetry is the recommended retry for a conflict where multiple clients +// are making changes to the same resource. +var DefaultRetry = wait.Backoff{ + Steps: 5, + Duration: 10 * time.Millisecond, + Factor: 1.0, + Jitter: 0.1, +} + +// DefaultBackoff is the recommended backoff for a conflict where a client +// may be attempting to make an unrelated modification to a resource under +// active management by one or more controllers. +var DefaultBackoff = wait.Backoff{ + Steps: 4, + Duration: 10 * time.Millisecond, + Factor: 5.0, + Jitter: 0.1, +} + +// OnError executes the provided function repeatedly, retrying if the server returns a specified +// error. Callers should preserve previous executions if they wish to retry changes. It performs an +// exponential backoff. +// +// var pod *api.Pod +// err := retry.OnError(DefaultBackoff, errors.IsConflict, func() (err error) { +// pod, err = c.Pods("mynamespace").UpdateStatus(podStatus) +// return +// }) +// if err != nil { +// // may be conflict if max retries were hit +// return err +// } +// ... +// +// TODO: Make Backoff an interface? +func OnError(backoff wait.Backoff, errorFunc func(error) bool, fn func() error) error { + var lastConflictErr error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := fn() + switch { + case err == nil: + return true, nil + case errorFunc(err): + lastConflictErr = err + return false, nil + default: + return false, err + } + }) + if err == wait.ErrWaitTimeout { + err = lastConflictErr + } + return err +} + +// RetryOnConflict executes the function function repeatedly, retrying if the server returns a conflicting +func RetryOnConflict(backoff wait.Backoff, fn func() error) error { + return OnError(backoff, errors.IsConflict, fn) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 13976194a..5bd8bf418 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -205,6 +205,7 @@ k8s.io/apimachinery/pkg/util/sets k8s.io/apimachinery/pkg/util/strategicpatch k8s.io/apimachinery/pkg/util/validation k8s.io/apimachinery/pkg/util/validation/field +k8s.io/apimachinery/pkg/util/wait k8s.io/apimachinery/pkg/util/yaml k8s.io/apimachinery/pkg/version k8s.io/apimachinery/pkg/watch @@ -279,6 +280,7 @@ k8s.io/client-go/util/exec k8s.io/client-go/util/flowcontrol k8s.io/client-go/util/homedir k8s.io/client-go/util/keyutil +k8s.io/client-go/util/retry # k8s.io/klog v1.0.0 k8s.io/klog # k8s.io/kube-openapi v0.0.0-20200121204235-bf4fb3bd569c