Skip to content

Commit

Permalink
chore(onchaos): Busy wait for onchaos mode (litmuschaos#659)
Browse files Browse the repository at this point in the history
Signed-off-by: Shubham Chaudhary <shubham.chaudhary@harness.io>
  • Loading branch information
ispeakc0de committed Apr 27, 2023
1 parent db11337 commit 4b771b2
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 96 deletions.
64 changes: 30 additions & 34 deletions pkg/probe/cmdprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ func getRunID() string {
// triggerInlineContinuousCmdProbe trigger the inline continuous cmd probes
func triggerInlineContinuousCmdProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {
var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
log.Infof("[Wait]: Waiting for %vs before probe execution", probe.RunProperties.InitialDelaySeconds)
Expand All @@ -314,15 +315,13 @@ loop:
err = triggerInlineCmdProbe(probe, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v cmd probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand All @@ -339,6 +338,7 @@ loop:
// triggerInlineOnChaosCmdProbe trigger the inline onchaos cmd probes
func triggerInlineOnChaosCmdProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {
var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
duration := chaosDetails.ChaosDuration
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
Expand All @@ -363,15 +363,13 @@ loop:
default:
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err = triggerInlineCmdProbe(probe, chaosresult); err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v cmd probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand All @@ -390,6 +388,7 @@ loop:
func triggerSourceOnChaosCmdProbe(probe v1alpha1.ProbeAttributes, execCommandDetails litmusexec.PodDetails, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {

var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
duration := chaosDetails.ChaosDuration
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
Expand All @@ -412,15 +411,13 @@ loop:
default:
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err = triggerSourceCmdProbe(probe, execCommandDetails, clients, chaosresult); err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v cmd probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand All @@ -440,6 +437,7 @@ loop:
func triggerSourceContinuousCmdProbe(probe v1alpha1.ProbeAttributes, execCommandDetails litmusexec.PodDetails, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {

var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
log.Infof("[Wait]: Waiting for %vs before probe execution", probe.RunProperties.InitialDelaySeconds)
Expand All @@ -453,15 +451,13 @@ loop:
err = triggerSourceCmdProbe(probe, execCommandDetails, clients, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v cmd probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand Down Expand Up @@ -648,14 +644,14 @@ func postChaosCmdProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Resu
case "Continuous", "OnChaos":
if reflect.DeepEqual(probe.CmdProbeInputs.Source, v1alpha1.SourceDetails{}) {
// it will check for the error, It will detect the error if any error encountered in probe during chaos
err = checkForErrorInContinuousProbe(resultDetails, probe.Name)
err = checkForErrorInContinuousProbe(resultDetails, chaosDetails.Timeout, chaosDetails.Delay, probe.Name)
// failing the probe, if the success condition doesn't met after the retry & timeout combinations
if err = markedVerdictInEnd(err, resultDetails, probe, "PostChaos"); err != nil {
return err
}
} else {
// it will check for the error, It will detect the error if any error encountered in probe during chaos
err = checkForErrorInContinuousProbe(resultDetails, probe.Name)
err = checkForErrorInContinuousProbe(resultDetails, chaosDetails.Timeout, chaosDetails.Delay, probe.Name)

// failing the probe, if the success condition doesn't met after the retry & timeout combinations
if err = markedVerdictInEnd(err, resultDetails, probe, "PostChaos"); err != nil {
Expand Down
32 changes: 15 additions & 17 deletions pkg/probe/httpprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func getHTTPBody(httpBody v1alpha1.PostMethod) (string, error) {
// triggerContinuousHTTPProbe trigger the continuous http probes
func triggerContinuousHTTPProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {
var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
log.Infof("[Wait]: Waiting for %vs before probe execution", probe.RunProperties.InitialDelaySeconds)
Expand All @@ -211,15 +212,13 @@ loop:
err = triggerHTTPProbe(probe, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v http probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand Down Expand Up @@ -307,7 +306,7 @@ func postChaosHTTPProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Res
}
case "Continuous", "OnChaos":
// it will check for the error, It will detect the error if any error encountered in probe during chaos
err = checkForErrorInContinuousProbe(resultDetails, probe.Name)
err = checkForErrorInContinuousProbe(resultDetails, chaosDetails.Timeout, chaosDetails.Delay, probe.Name)
// failing the probe, if the success condition doesn't met after the retry & timeout combinations
if err = markedVerdictInEnd(err, resultDetails, probe, "PostChaos"); err != nil {
return err
Expand All @@ -320,6 +319,7 @@ func postChaosHTTPProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Res
func triggerOnChaosHTTPProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {

var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
duration := chaosDetails.ChaosDuration
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
Expand All @@ -343,15 +343,13 @@ loop:
err = triggerHTTPProbe(probe, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}

probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand Down
32 changes: 15 additions & 17 deletions pkg/probe/k8sprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ func triggerK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets,
// triggerContinuousK8sProbe trigger the continuous k8s probes
func triggerContinuousK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {
var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
log.Infof("[Wait]: Waiting for %vs before probe execution", probe.RunProperties.InitialDelaySeconds)
Expand All @@ -128,15 +129,13 @@ loop:
err = triggerK8sProbe(probe, clients, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("the %v k8s probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand Down Expand Up @@ -255,7 +254,7 @@ func postChaosK8sProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Resu
}
case "continuous", "onchaos":
// it will check for the error, It will detect the error if any error encountered in probe during chaos
err = checkForErrorInContinuousProbe(resultDetails, probe.Name)
err = checkForErrorInContinuousProbe(resultDetails, chaosDetails.Timeout, chaosDetails.Delay, probe.Name)
// failing the probe, if the success condition doesn't met after the retry & timeout combinations
if err = markedVerdictInEnd(err, resultDetails, probe, "PostChaos"); err != nil {
return err
Expand Down Expand Up @@ -287,6 +286,7 @@ func onChaosK8sProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Result
func triggerOnChaosK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) {

var isExperimentFailed bool
probeDetails := getProbeByName(probe.Name, chaosresult.ProbeDetails)
duration := chaosDetails.ChaosDuration
// waiting for initial delay
if probe.RunProperties.InitialDelaySeconds != 0 {
Expand All @@ -310,15 +310,13 @@ loop:
err = triggerK8sProbe(probe, clients, chaosresult)
// record the error inside the probeDetails, we are maintaining a dedicated variable for the err, inside probeDetails
if err != nil {
for index := range chaosresult.ProbeDetails {
if chaosresult.ProbeDetails[index].Name == probe.Name {
chaosresult.ProbeDetails[index].IsProbeFailedWithError = err
log.Errorf("The %v k8s probe has been Failed, err: %v", probe.Name, err)
isExperimentFailed = true
break loop
}
}
probeDetails.IsProbeFailedWithError = err
log.Errorf("%v http probe has Failed, err: %v", probe.Name, err)
isExperimentFailed = true
probeDetails.HasProbeExecutedOnce = true
break loop
}
probeDetails.HasProbeExecutedOnce = true
// waiting for the probe polling interval
time.Sleep(time.Duration(probe.RunProperties.ProbePollingInterval) * time.Second)
}
Expand Down
39 changes: 30 additions & 9 deletions pkg/probe/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func getProbesFromEngine(chaosDetails *types.ChaosDetails, clients clients.Clien
// it fetch the probe details from the chaosengine and set into the chaosresult
func InitializeProbesInChaosResultDetails(chaosDetails *types.ChaosDetails, clients clients.ClientSets, chaosresult *types.ResultDetails) error {

var probeDetails []types.ProbeDetails
var probeDetails []*types.ProbeDetails
// get the probes from the chaosengine
probes, err := getProbesFromEngine(chaosDetails, clients)
if err != nil {
Expand All @@ -148,7 +148,7 @@ func InitializeProbesInChaosResultDetails(chaosDetails *types.ChaosDetails, clie

// set the probe details for k8s probe
for _, probe := range probes {
tempProbe := types.ProbeDetails{}
tempProbe := &types.ProbeDetails{}
tempProbe.Name = probe.Name
tempProbe.Type = probe.Type
tempProbe.Mode = probe.Mode
Expand Down Expand Up @@ -252,14 +252,25 @@ func getDescription(mode, phase string) string {
}

//CheckForErrorInContinuousProbe check for the error in the continuous probes
func checkForErrorInContinuousProbe(resultDetails *types.ResultDetails, probeName string) error {

for index, probe := range resultDetails.ProbeDetails {
if probe.Name == probeName {
return resultDetails.ProbeDetails[index].IsProbeFailedWithError
func checkForErrorInContinuousProbe(resultDetails *types.ResultDetails, timeout, delay int, probeName string) error {
probe := getProbeByName(probeName, resultDetails.ProbeDetails)
timeoutChan := time.After(time.Duration(timeout) * time.Second)

loop:
for {
select {
case <-timeoutChan:
return errors.Errorf("%v probe execution timed out", probeName)
default:
if probe.HasProbeExecutedOnce {
break loop
}
log.Infof("[Probe]: Waiting for %s probe to finish or timeout", probeName)
time.Sleep(time.Duration(delay) * time.Second)
}
}
return nil

return probe.IsProbeFailedWithError
}

// ParseCommand parse the templated command and replace the templated value by actual value
Expand All @@ -282,7 +293,7 @@ func parseCommand(templatedCommand string, resultDetails *types.ResultDetails) (
// stopChaosEngine update the probe status and patch the chaosengine to stop state
func stopChaosEngine(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, chaosresult *types.ResultDetails, chaosDetails *types.ChaosDetails) error {
// it will check for the error, It will detect the error if any error encountered in probe during chaos
err = checkForErrorInContinuousProbe(chaosresult, probe.Name)
err = checkForErrorInContinuousProbe(chaosresult, chaosDetails.Timeout, chaosDetails.Delay, probe.Name)
// failing the probe, if the success condition doesn't met after the retry & timeout combinations
markedVerdictInEnd(err, chaosresult, probe, "PostChaos")
//patch chaosengine's state to stop
Expand Down Expand Up @@ -332,3 +343,13 @@ func getProbeVerdict(resultDetails *types.ResultDetails, name, probeType string)
}
return v1alpha1.ProbeVerdictNA
}

// getProbeByName returns the probe details of a probe given its name
func getProbeByName(name string, probeDetails []*types.ProbeDetails) *types.ProbeDetails {
for _, p := range probeDetails {
if p.Name == name {
return p
}
}
return nil
}

0 comments on commit 4b771b2

Please sign in to comment.