Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions api/v1alpha1/nodeoperation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ type NodeOperationList struct {
Items []NodeOperation `json:"items"`
}

func (o *NodeOperation) NodeRemediationName() string {
for _, owner := range o.OwnerReferences {
if owner.APIVersion == GroupVersion.String() && owner.Kind == "NodeRemediation" {
return owner.Name
}
}
return ""
}

func init() {
SchemeBuilder.Register(&NodeOperation{}, &NodeOperationList{})
}
28 changes: 28 additions & 0 deletions api/v1alpha1/noderemediation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,23 @@ type NodeConditionMatcher struct {
Status corev1.ConditionStatus `json:"status"`
}

type NodeStatus string

const (
NodeStatusUnknown NodeStatus = ""
NodeStatusOK NodeStatus = "OK"
NodeStatusBad NodeStatus = "Bad"
)

// NodeRemediationStatus defines the observed state of NodeRemediation
type NodeRemediationStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
ActiveNodeOperation corev1.ObjectReference `json:"activeNodeOperation,omitempty"`
// OperationsCount is num of NodeOperations executed by the NodeRemediation. Once the Node is remediated, this count will be reset to 0.
OperationsCount int64 `json:"operationsCount"`
// NodeStatus represents whether Node should be remediated or not.
NodeStatus NodeStatus `json:"nodeStatus"`
}

//+kubebuilder:object:root=true
Expand Down Expand Up @@ -81,3 +91,21 @@ type NodeRemediationList struct {
func init() {
SchemeBuilder.Register(&NodeRemediation{}, &NodeRemediationList{})
}

func (r *NodeRemediation) CompareNodeCondition(conditions []corev1.NodeCondition) NodeStatus {
matchersLoop:
for _, matcher := range r.Spec.Rule.Conditions {
for _, cond := range conditions {
if cond.Type == matcher.Type {
switch cond.Status {
case matcher.Status:
continue matchersLoop
case corev1.ConditionUnknown:
return NodeStatusUnknown
}
}
}
return NodeStatusOK
}
return NodeStatusBad
}
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,18 @@ spec:
type: string
type: object
x-kubernetes-map-type: atomic
nodeStatus:
description: NodeStatus represents whether Node should be remediated
or not.
type: string
operationsCount:
description: OperationsCount is num of NodeOperations executed by
the NodeRemediation. Once the Node is remediated, this count will
be reset to 0.
format: int64
type: integer
required:
- nodeStatus
- operationsCount
type: object
type: object
Expand Down
19 changes: 17 additions & 2 deletions controllers/nodeoperation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,17 +359,32 @@ func (r *NodeOperationReconciler) reconcileRunning(ctx context.Context, nodeOp *
nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseFailed
} else {
_, condition := isJobFinished(&job)

switch condition {
case "": // ongoing
case "":
return ctrl.Result{}, nil
case batchv1.JobFailed:
r.eventRecorder.Eventf(nodeOp, "Normal", "JobFinished", `Job "%s" in "%s" has failed`, job.Name, job.Namespace)
nodeOp.Status.Reason = "Job has failed"
nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseFailed
case batchv1.JobComplete:
r.eventRecorder.Eventf(nodeOp, "Normal", "JobFinished", `Job "%s" in "%s" has completed`, job.Name, job.Namespace)

if remediationName := nodeOp.NodeRemediationName(); remediationName != "" {
var remediation nodeopsv1alpha1.NodeRemediation
if err := r.Get(ctx, client.ObjectKey{Namespace: nodeOp.Namespace, Name: remediationName}, &remediation); err != nil {
return ctrl.Result{}, err
}

if remediation.Status.NodeStatus != nodeopsv1alpha1.NodeStatusOK {
r.eventRecorder.Eventf(nodeOp, corev1.EventTypeNormal, "JobCompletedButNotRemediated", `Job "%s" in "%s" has completed but the Node is not remediated yet.`, job.Name, job.Namespace)
return ctrl.Result{}, nil
}
}

nodeOp.Status.Reason = "Job has completed"
nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseCompleted
}
r.eventRecorder.Eventf(nodeOp, "Normal", "JobFinished", `Job "%s" in "%s" has finished`, job.Name, job.Namespace)
}

// untaint the Node
Expand Down
40 changes: 18 additions & 22 deletions controllers/noderemediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,19 @@ func (r *NodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{}, err
}

var node corev1.Node
if err := r.Get(ctx, types.NamespacedName{Name: remediation.Spec.NodeName}, &node); err != nil {
return ctrl.Result{}, err
}

nodeStatus := remediation.CompareNodeCondition(node.Status.Conditions)
if nodeStatus != remediation.Status.NodeStatus {
remediation.Status.NodeStatus = nodeStatus
if err := r.Status().Update(ctx, &remediation); err != nil {
return ctrl.Result{}, err
}
}

var childOps nodeopsv1alpha1.NodeOperationList
if err := r.List(ctx, &childOps, client.MatchingFields{operationRemediationOwnerKey: remediation.Name}); err != nil {
return ctrl.Result{}, err
Expand Down Expand Up @@ -108,18 +121,16 @@ func (r *NodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}

// Check node condition
var node corev1.Node
if err := r.Get(ctx, types.NamespacedName{Name: remediation.Spec.NodeName}, &node); err != nil {
return ctrl.Result{}, err
}

if !doesMatchConditions(node.Status.Conditions, remediation.Spec.Rule.Conditions) {
switch remediation.Status.NodeStatus {
case nodeopsv1alpha1.NodeStatusUnknown:
r.eventRecorder.Eventf(&remediation, corev1.EventTypeNormal, "UnknownNodeStatus", "Because at least one Node condition is unknown status, remediation process is skipped")
return ctrl.Result{}, nil
case nodeopsv1alpha1.NodeStatusOK:
// reset OperationsCount
remediation.Status.OperationsCount = 0
if err := r.Status().Update(ctx, &remediation); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -225,18 +236,3 @@ func (r *NodeRemediationReconciler) SetupWithManager(mgr ctrl.Manager) error {
Watches(&source.Kind{Type: &corev1.Node{}}, handler.EnqueueRequestsFromMapFunc(nodeMapFn)).
Complete(r)
}

func doesMatchConditions(conditions []corev1.NodeCondition, matchers []nodeopsv1alpha1.NodeConditionMatcher) bool {
for _, matcher := range matchers {
ok := false
for _, cond := range conditions {
if cond.Type == matcher.Type && cond.Status == matcher.Status {
ok = true
}
}
if !ok {
return false
}
}
return true
}
30 changes: 26 additions & 4 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -665,21 +665,41 @@ var _ = Describe("NodeRemediation", func() {

node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
Type: "TestRemediation",
Status: corev1.ConditionTrue,
Status: corev1.ConditionUnknown,
Reason: "testing",
Message: "testing",
LastHeartbeatTime: metav1.NewTime(time.Now()),
LastTransitionTime: metav1.NewTime(time.Now()),
})
Expect(k8sClient.Status().Update(ctx, &node)).NotTo(HaveOccurred())

Eventually(func() bool {
events := &corev1.EventList{}
Expect(k8sClient.List(ctx, events)).NotTo(HaveOccurred())

for _, event := range events.Items {
if event.InvolvedObject.GroupVersionKind().String() == "nodeops.k8s.preferred.jp/v1alpha1, Kind=NodeRemediation" &&
event.InvolvedObject.Name == remediation.Name &&
event.Type == corev1.EventTypeNormal &&
event.Reason == "UnknownNodeStatus" {
return true
}
}
return false
}, eventuallyTimeout).Should(BeTrue())

node.Status.Conditions[len(node.Status.Conditions)-1].Status = corev1.ConditionTrue
Expect(k8sClient.Status().Update(ctx, &node)).NotTo(HaveOccurred())

var nodeOp *nodeopsv1alpha1.NodeOperation
Eventually(func() bool {
nodeOpList := nodeopsv1alpha1.NodeOperationList{}
Expect(k8sClient.List(ctx, &nodeOpList)).NotTo(HaveOccurred())

for _, op := range nodeOpList.Items {
for _, owner := range op.OwnerReferences {
if owner.Kind == "NodeRemediation" && owner.Name == remediation.Name {
nodeOp = &op
return true
}
}
Expand All @@ -693,10 +713,12 @@ var _ = Describe("NodeRemediation", func() {
Expect(k8sClient.List(ctx, events)).NotTo(HaveOccurred())

for _, event := range events.Items {
if event.InvolvedObject.GroupVersionKind().String() == "nodeops.k8s.preferred.jp/v1alpha1, Kind=NodeRemediation" &&
event.InvolvedObject.Name == remediation.Name &&
gvk := event.InvolvedObject.GroupVersionKind()
if gvk.Group == "nodeops.k8s.preferred.jp" &&
gvk.Kind == "NodeOperation" &&
event.InvolvedObject.Name == nodeOp.Name &&
event.Type == corev1.EventTypeNormal &&
event.Reason == "NodeIsNotRemediated" {
event.Reason == "JobCompletedButNotRemediated" {
return true
}
}
Expand Down