Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat) Deletion of Chaos Resources after Completion of Chaos-Runner #156

Merged
merged 1 commit into from
Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 37 additions & 3 deletions pkg/apis/litmuschaos/v1alpha1/chaosengine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,53 @@ type ChaosEngineSpec struct {
//Monitor Enable Status
Monitoring bool `json:"monitoring,omitempty"`
//JobCleanUpPolicy decides to retain or delete the jobs
JobCleanUpPolicy string `json:"jobCleanUpPolicy,omitempty"`
JobCleanUpPolicy CleanUpPolicy `json:"jobCleanUpPolicy,omitempty"`
//AuxiliaryAppInfo contains details of dependent applications (infra chaos)
AuxiliaryAppInfo string `json:"auxiliaryAppInfo,omitempty"`
//EngineStatus is a requirement for validation
EngineState string `json:"engineState"`
EngineState EngineState `json:"engineState"`
}

// EngineState provides interface for all supported strings in spec.EngineState
type EngineState string

const (
// EngineStateActive starts the reconcile call
EngineStateActive EngineState = "active"
// EngineStateStop stops the reconcile call
EngineStateStop EngineState = "stop"
)

// EngineStatus provides interface for all supported strings in status.EngineStatus
type EngineStatus string

const (
// EngineStatusInitialized is used for reconcile calls to start reconcile for creation
EngineStatusInitialized EngineStatus = "initialized"
// EngineStatusCompleted is used for reconcile calls to start reconcile for completion
EngineStatusCompleted EngineStatus = "completed"
// EngineStatusStopped is used for reconcile calls to start reconcile for delete
EngineStatusStopped EngineStatus = "stopped"
)

// CleanUpPolicy defines the garbage collection method used by chaos-operator
type CleanUpPolicy string

const (
//CleanUpPolicyDelete sets the garbage collection policy of chaos-operator to Delete Chaos Resources
CleanUpPolicyDelete CleanUpPolicy = "delete"

//CleanUpPolicyRetain sets the garbage collection policy of chaos-operator to Retain Chaos Resources
CleanUpPolicyRetain CleanUpPolicy = "retain"
)

// ChaosEngineStatus defines the observed state of ChaosEngine
// +k8s:openapi-gen=true

// ChaosEngineStatus derives information about status of individual experiments
type ChaosEngineStatus struct {
//
EngineStatus string `json:"engineStatus"`
EngineStatus EngineStatus `json:"engineStatus"`
//Detailed status of individual experiments
Experiments []ExperimentStatuses `json:"experiments"`
}
Expand Down
267 changes: 193 additions & 74 deletions pkg/controller/chaosengine/chaosengine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,75 +160,50 @@ func (r *ReconcileChaosEngine) Reconcile(request reconcile.Request) (reconcile.R
return reconcile.Result{}, err
}

if engine.Instance.Spec.EngineState == "" {

err := r.updateState(engine, "active")
reqLogger.Error(err, "Unable to Update Status in ChaosEngine Resource")

}

if engine.Instance.Spec.EngineState == "active" && engine.Instance.Status.EngineStatus == "" {

err := r.updateStatus(engine, "initialized")
if err != nil {
reqLogger.Error(err, "Unable to Update Status in ChaosEngine Resource")
}

err = r.addFinalzerToEngine(engine, finalizer)
if err != nil {
reqLogger.Error(err, "Unable to add Finalizers in ChaosEngine Resource")
// At the start of this reconcile calls, if the status of chaos engine is empty, fill it up with active
if err := r.initEngineState(engine); err != nil {
reqLogger.Error(err, "Unable to update EngineState in ChaosEngine Resource, due to error: %v", err)
}

// Check if the chaos-runner pod is completed or not
isCompleted := r.checkRunnerPodCompleted(engine)
// If isCompleted is true, then proceed, and verify if this is the first call of this type,
// by verifying that this engineStatus has never been seen before
// If thats the case, then udpate the engineStatus to completed
if isCompleted && engine.Instance.Status.EngineStatus != litmuschaosv1alpha1.EngineStatusCompleted {
if err := r.updateStatus(engine, litmuschaosv1alpha1.EngineStatusCompleted); err != nil {
return reconcile.Result{}, err
}

}

if checkEngineStateForStop(engine) {
reconcileResult, err := r.reconcileForDelete(request)
return reconcileResult, err

// Verify that the engineStatus is set to completed,
// if thats the case, then reconcile for completed
if checkEngineStatusForComplete(engine) {
return r.reconcileForComplete(request)
}
// Get the image for runner and monitor pod from chaosengine spec,operator env or default values.
setChaosResourceImage()

//getAnnotationCheck fetch the annotationCheck from engine spec
err = getAnnotationCheck()
if err != nil {
return reconcile.Result{}, err
// Verify that the engineState is set to stop
// if true, set the engineStatus as stopped
if checkEngineStateForStop(engine) {
if err := r.updateStatus(engine, litmuschaosv1alpha1.EngineStatusStopped); err != nil {
return reconcile.Result{}, err
}
}

// Fetch the app details from ChaosEngine instance. Check if app is present
// Also check, if the app is annotated for chaos & that the labels are unique
engine, err := getApplicationDetail(engine)
if err != nil {
return reconcile.Result{}, err
// Verify that the engineStatus is set to stopped,
// then reconcile for delete
if checkEngineStatusForStopped(engine) {
return r.reconcileForDelete(request)
}

if engine.Instance.Spec.AnnotationCheck == "true" {
// Determine whether apps with matching labels have chaos annotation set to true
engine, err = resource.CheckChaosAnnotation(engine)
if err != nil {
chaosTypes.Log.Info("Annotation check failed with", "error:", err)
return reconcile.Result{}, nil
// Verify that the engineState, and engineStatus to initalized chaos engine resources
if checkEngineForCreation(engine) {
if err = r.checkRunnerPodForCompletion(engine, reqLogger); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we checking and updating the completed status?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are checking because, if state, and status is active and initialized, so whenever the call goes inside checkEngineForCreation (fi condition) we have to check if the runner going into completed and if so then update status

Copy link
Member

@ksatchit ksatchit Mar 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a single function checkRunnerPodCompleted & checkRunnerPodForCompletion to derive pod status

return reconcile.Result{}, err
}
}
// Define an engineRunner pod which is secondary-resource #1
engineRunner, err := newRunnerPodForCR(*engine)
if err != nil {
return reconcile.Result{}, err
return r.reconcileForCreationAndRunning(engine, request)
}

//Check if the engineRunner pod already exists, else create
engineReconcile, err := r.checkEngineRunnerPod(reqLogger, engineRunner)
if err != nil {
return reconcile.Result{}, err
}
// If monitoring is set to true,
// Define an engineMonitor pod which is secondary-resource #2 and
// Define an engineMonitor service which is secondary-resource #3
// in the same namespace as CR
reconcileResult, err := checkMonitoring(engineReconcile, reqLogger)
if err != nil {
return reconcileResult, err
}
return reconcile.Result{}, nil
}

Expand Down Expand Up @@ -623,16 +598,11 @@ func (r *ReconcileChaosEngine) reconcileForDelete(request reconcile.Request) (re
if err != nil {
return reconcileResult, err
}
reconcileResult, err = r.removeChaosServices(engine, request)
if err != nil {
return reconcileResult, err
}
opts := client.UpdateOptions{}

err = r.updateStatus(engine, "stopped")
err = r.removeChaosServices(engine, request)
if err != nil {
return reconcile.Result{}, err
}
opts := client.UpdateOptions{}

if engine.Instance.ObjectMeta.Finalizers != nil {
engine.Instance.ObjectMeta.Finalizers = utils.RemoveString(engine.Instance.ObjectMeta.Finalizers, "chaosengine.litmuschaos.io/finalizer")
Expand All @@ -645,21 +615,21 @@ func (r *ReconcileChaosEngine) reconcileForDelete(request reconcile.Request) (re

}

func (r *ReconcileChaosEngine) removeChaosServices(engine *chaosTypes.EngineInfo, request reconcile.Request) (reconcile.Result, error) {
func (r *ReconcileChaosEngine) removeChaosServices(engine *chaosTypes.EngineInfo, request reconcile.Request) error {
optsList := []client.ListOption{
client.InNamespace(request.NamespacedName.Namespace),
client.MatchingLabels{"chaosUID": string(engine.Instance.UID)},
client.MatchingLabels{"app": "chaos-exporter", "chaosUID": string(engine.Instance.UID)},
}
var serviceList corev1.ServiceList
if errList := r.client.List(context.TODO(), &serviceList, optsList...); errList != nil {
return reconcile.Result{}, errList
return errList
}
for _, v := range serviceList.Items {
if err := r.client.Delete(context.TODO(), &v, []client.DeleteOption{}...); err != nil {
return reconcile.Result{}, err
if errDel := r.client.Delete(context.TODO(), &v, []client.DeleteOption{}...); errDel != nil {
return errDel
}
}
return reconcile.Result{}, nil
return nil
}

func (r *ReconcileChaosEngine) removeChaosResources(engine *chaosTypes.EngineInfo, request reconcile.Request) (reconcile.Result, error) {
Expand Down Expand Up @@ -698,7 +668,7 @@ func (r *ReconcileChaosEngine) removeChaosResources(engine *chaosTypes.EngineInf
return reconcile.Result{}, nil
}

func (r *ReconcileChaosEngine) addFinalzerToEngine(engine *chaosTypes.EngineInfo, finalizer string) error {
func (r *ReconcileChaosEngine) addFinalizerToEngine(engine *chaosTypes.EngineInfo, finalizer string) error {
optsUpdate := client.UpdateOptions{}
if engine.Instance.ObjectMeta.Finalizers == nil {
engine.Instance.ObjectMeta.Finalizers = append(engine.Instance.ObjectMeta.Finalizers, finalizer)
Expand All @@ -711,23 +681,172 @@ func (r *ReconcileChaosEngine) addFinalzerToEngine(engine *chaosTypes.EngineInfo
return nil
}

func (r *ReconcileChaosEngine) updateStatus(engine *chaosTypes.EngineInfo, status string) error {
func (r *ReconcileChaosEngine) updateStatus(engine *chaosTypes.EngineInfo, status litmuschaosv1alpha1.EngineStatus) error {
opts := client.UpdateOptions{}
engine.Instance.Status.EngineStatus = status
return r.client.Update(context.TODO(), engine.Instance, &opts)
}

func (r *ReconcileChaosEngine) updateState(engine *chaosTypes.EngineInfo, state string) error {
func (r *ReconcileChaosEngine) updateState(engine *chaosTypes.EngineInfo, state litmuschaosv1alpha1.EngineState) error {
opts := client.UpdateOptions{}
engine.Instance.Spec.EngineState = state
return r.client.Update(context.TODO(), engine.Instance, &opts)
}

func checkEngineStateForStop(engine *chaosTypes.EngineInfo) bool {
deletetimeStamp := engine.Instance.ObjectMeta.GetDeletionTimestamp()
if engine.Instance.Spec.EngineState == "stop" ||
if engine.Instance.Spec.EngineState == litmuschaosv1alpha1.EngineStateStop ||
deletetimeStamp != nil {
return true
}
return false
}

func (r *ReconcileChaosEngine) checkRunnerPodCompleted(engine *chaosTypes.EngineInfo) bool {
runnerPod := corev1.Pod{}
r.client.Get(context.TODO(), types.NamespacedName{Name: engine.Instance.Name + "-runner", Namespace: engine.Instance.Namespace}, &runnerPod)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error return value of r.client.Get is not checked (from errcheck)

return runnerPod.Status.Phase == corev1.PodSucceeded
}

func (r *ReconcileChaosEngine) removeDefaultChaosResources(request reconcile.Request) (reconcile.Result, error) {
if err := r.removeChaosRunner(engine, request); err != nil {
return reconcile.Result{}, err
}
if err := r.removeChaosServices(engine, request); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
}

func (r *ReconcileChaosEngine) removeChaosRunner(engine *chaosTypes.EngineInfo, request reconcile.Request) error {
optsList := []client.ListOption{
client.InNamespace(request.NamespacedName.Namespace),
client.MatchingLabels{"app": engine.Instance.Name, "chaosUID": string(engine.Instance.UID)},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the name of chaos-runner to identify -- label with key app is a little too generic. We run the risk of deleting retained jobs as well?

}
var podList corev1.PodList
if errList := r.client.List(context.TODO(), &podList, optsList...); errList != nil {
return errList
}
for _, v := range podList.Items {
if errDel := r.client.Delete(context.TODO(), &v, []client.DeleteOption{}...); errDel != nil {
return errDel
}
}
return nil
}

func (r *ReconcileChaosEngine) checkRunnerPodForCompletion(engine *chaosTypes.EngineInfo, reqLogger logr.Logger) error {
isCompleted := r.checkRunnerPodCompleted(engine)
if isCompleted {
err := r.updateStatus(engine, litmuschaosv1alpha1.EngineStatusCompleted)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to be returning the pod status and allow updateStatus to happen at a central location inside the reconcileForComplete()

if err != nil {
return err
}
}
return nil
}

func checkEngineStatusForComplete(engine *chaosTypes.EngineInfo) bool {
return engine.Instance.Status.EngineStatus == litmuschaosv1alpha1.EngineStatusCompleted

}
func (r *ReconcileChaosEngine) reconcileForComplete(request reconcile.Request) (reconcile.Result, error) {
_, err := r.removeDefaultChaosResources(request)
if err != nil {
return reconcile.Result{}, err
}
if engine.Instance.ObjectMeta.Finalizers != nil {
engine.Instance.ObjectMeta.Finalizers = utils.RemoveString(engine.Instance.ObjectMeta.Finalizers, "chaosengine.litmuschaos.io/finalizer")
r.recorder.Eventf(engine.Instance, corev1.EventTypeNormal, "ChaosEngine Stopped", "Removing all experiment resources allocated to ChaosEngine: %v in Namespace: %v", engine.Instance.Name, engine.Instance.Namespace)
}
err = r.updateState(engine, litmuschaosv1alpha1.EngineStateStop)
if err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
}

func (r *ReconcileChaosEngine) initEngineState(engine *chaosTypes.EngineInfo) error {
if engine.Instance.Spec.EngineState == "" {
err := r.updateState(engine, litmuschaosv1alpha1.EngineStateActive)
return err
}
if err := r.initEngineStatus(engine); err != nil {
return err
}
return nil
}

func (r *ReconcileChaosEngine) initEngineStatus(engine *chaosTypes.EngineInfo) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do this function operation inside initEngineState function, like both are doing the operation at initializing time

if engine.Instance.Spec.EngineState == litmuschaosv1alpha1.EngineStateActive && engine.Instance.Status.EngineStatus != litmuschaosv1alpha1.EngineStatusCompleted {

err := r.updateStatus(engine, litmuschaosv1alpha1.EngineStatusInitialized)
if err != nil {
return err
}

err = r.addFinalizerToEngine(engine, finalizer)
if err != nil {
return err
}

}
return nil
}

func checkEngineStatusForStopped(engine *chaosTypes.EngineInfo) bool {
return (engine.Instance.Status.EngineStatus == litmuschaosv1alpha1.EngineStatusStopped && engine.Instance.Status.EngineStatus != litmuschaosv1alpha1.EngineStatusCompleted)

}
func checkEngineForCreation(engine *chaosTypes.EngineInfo) bool {
return engine.Instance.Status.EngineStatus == litmuschaosv1alpha1.EngineStatusInitialized && engine.Instance.Spec.EngineState == litmuschaosv1alpha1.EngineStateActive
}

func (r *ReconcileChaosEngine) reconcileForCreationAndRunning(engine *chaosTypes.EngineInfo, request reconcile.Request) (reconcile.Result, error) {
reqLogger := chaosTypes.Log.WithValues("Request.Namespace", request.Namespace, "Request.Name", request.Name)
reqLogger.Info("Reconciling ChaosEngine")
// Get the image for runner and monitor pod from chaosengine spec,operator env or default values.
setChaosResourceImage()

//getAnnotationCheck fetch the annotationCheck from engine spec
err := getAnnotationCheck()
if err != nil {
return reconcile.Result{}, err
}

// Fetch the app details from ChaosEngine instance. Check if app is present
// Also check, if the app is annotated for chaos & that the labels are unique
engine, err = getApplicationDetail(engine)
if err != nil {
return reconcile.Result{}, err
}

if engine.Instance.Spec.AnnotationCheck == "true" {
// Determine whether apps with matching labels have chaos annotation set to true
engine, err = resource.CheckChaosAnnotation(engine)
if err != nil {
chaosTypes.Log.Info("Annotation check failed with", "error:", err)
return reconcile.Result{}, nil
}
}
// Define an engineRunner pod which is secondary-resource #1
engineRunner, err := newRunnerPodForCR(*engine)
if err != nil {
return reconcile.Result{}, err
}

//Check if the engineRunner pod already exists, else create
engineReconcile, err := r.checkEngineRunnerPod(reqLogger, engineRunner)
if err != nil {
return reconcile.Result{}, err
}
// If monitoring is set to true,
// Define an engineMonitor pod which is secondary-resource #2 and
// Define an engineMonitor service which is secondary-resource #3
// in the same namespace as CR
reconcileResult, err := checkMonitoring(engineReconcile, reqLogger)
if err != nil {
return reconcileResult, err
}
return reconcile.Result{}, nil
}