Skip to content

Commit

Permalink
Merge pull request kubernetes-sigs#74 from lreciomelero/fix/Extract_m…
Browse files Browse the repository at this point in the history
…hc_from_templates

extracted machinehealthchecks from templates
  • Loading branch information
lreciomelero committed Mar 23, 2023
2 parents 81966f4 + bfea24a commit 0b266ef
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 57 deletions.
18 changes: 0 additions & 18 deletions pkg/cluster/internal/create/actions/cluster/templates/aws.eks.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -200,21 +200,3 @@ spec:
{{- end }}
{{- end }}
{{- end }}
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: "{{ $.Descriptor.ClusterID }}-worker-node-unhealthy"
spec:
clusterName: "{{ $.Descriptor.ClusterID }}"
nodeStartupTimeout: 300s
selector:
matchLabels:
keos.stratio.com/machine-role: "{{ $.Descriptor.ClusterID }}-worker-node"
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 60s
- type: Ready
status: 'False'
timeout: 60s
36 changes: 0 additions & 36 deletions pkg/cluster/internal/create/actions/cluster/templates/gcp.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -277,39 +277,3 @@ spec:
{{- end }}
{{- end }}
{{- end }}
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: "{{ $.Descriptor.ClusterID }}-controlplane-node-unhealthy"
spec:
clusterName: "{{ $.Descriptor.ClusterID }}"
nodeStartupTimeout: 300s
selector:
matchLabels:
keos.stratio.com/machine-role: "{{ $.Descriptor.ClusterID }}-control-plane-node"
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 60s
- type: Ready
status: 'False'
timeout: 60s
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: "{{ $.Descriptor.ClusterID }}-worker-node-unhealthy"
spec:
clusterName: "{{ $.Descriptor.ClusterID }}"
nodeStartupTimeout: 300s
selector:
matchLabels:
keos.stratio.com/machine-role: "{{ $.Descriptor.ClusterID }}-worker-node"
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 60s
- type: Ready
status: 'False'
timeout: 60s
18 changes: 16 additions & 2 deletions pkg/cluster/internal/create/actions/createworker/createworker.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ limitations under the License.
package createworker

import (
_ "embed"
"bytes"
_ "embed"
"os"
"strings"

Expand Down Expand Up @@ -58,9 +58,12 @@ type Secrets struct {

//go:embed files/allow-all-egress_netpol.yaml
var allowCommonEgressNetPol string

// In common with keos installer
//
//go:embed files/deny-all-egress-imds_gnetpol.yaml
var denyallEgressIMDSGNetPol string

//go:embed files/allow-capa-egress-imds_gnetpol.yaml
var allowCAPAEgressIMDSGNetPol string

Expand Down Expand Up @@ -317,6 +320,17 @@ func (a *action) Execute(ctx *actions.ActionContext) error {
return errors.Wrap(err, "failed to create the worker Cluster")
}
}
ctx.Status.End(true) // End Preparing nodes in workload cluster

ctx.Status.Start("Enabling workload cluster's self-healing 🏥")
defer ctx.Status.End(false)

err = enableSelfHealing(node, *descriptorFile, capiClustersNamespace)
if err := cmd.SetStdout(&raw).Run(); err != nil {
return errors.Wrap(err, "failed to enable workload cluster's self-healing")
}

ctx.Status.End(true) // End Enabling workload cluster's self-healing

ctx.Status.Start("Installing CAPx in workload cluster 🎖️")
defer ctx.Status.End(false)
Expand Down Expand Up @@ -407,7 +421,7 @@ func (a *action) Execute(ctx *actions.ActionContext) error {
}
}
}

ctx.Status.End(true) // End Installing Network Policy Engine in workload cluster

if descriptorFile.DeployAutoscaler {
Expand Down
62 changes: 61 additions & 1 deletion pkg/cluster/internal/create/actions/createworker/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ limitations under the License.
package createworker

import (
"bytes"
_ "embed"

"sigs.k8s.io/kind/pkg/cluster/internal/create/actions/cluster"
"sigs.k8s.io/kind/pkg/cluster/nodes"
"sigs.k8s.io/kind/pkg/errors"
"sigs.k8s.io/kind/pkg/cluster/internal/create/actions/cluster"
)

const (
Expand All @@ -36,6 +37,9 @@ const (
CalicoTemplate = "/kind/calico-helm-values.yaml"
)

const machineHealthCheckWorkerNodePath = "/kind/manifests/machinehealthcheckworkernode.yaml"
const machineHealthCheckControlPlaneNodePath = "/kind/manifests/machinehealthcheckcontrolplane.yaml"

type PBuilder interface {
setCapx(managed bool)
setCapxEnvVars(p ProviderParams)
Expand Down Expand Up @@ -174,3 +178,59 @@ func (p *Provider) installCAPXLocal(node nodes.Node) error {

return nil
}

func enableSelfHealing(node nodes.Node, descriptorFile cluster.DescriptorFile, namespace string) error {

if !descriptorFile.ControlPlane.Managed {

machineRole := "-control-plane-node"
generateMHCManifest(node, descriptorFile.ClusterID, namespace, machineHealthCheckControlPlaneNodePath, machineRole)

raw := bytes.Buffer{}
cmd := node.Command("kubectl", "-n", namespace, "apply", "-f", machineHealthCheckControlPlaneNodePath)
if err := cmd.SetStdout(&raw).Run(); err != nil {
return errors.Wrap(err, "failed to apply the MachineHealthCheck manifest")
}
}

machineRole := "-worker-node"
generateMHCManifest(node, descriptorFile.ClusterID, namespace, machineHealthCheckWorkerNodePath, machineRole)

raw := bytes.Buffer{}
cmd := node.Command("kubectl", "-n", namespace, "apply", "-f", machineHealthCheckWorkerNodePath)
if err := cmd.SetStdout(&raw).Run(); err != nil {
return errors.Wrap(err, "failed to apply the MachineHealthCheck manifest")
}

return nil
}

func generateMHCManifest(node nodes.Node, clusterID string, namespace string, manifestPath string, machineRole string) error {

var machineHealthCheck = `
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: ` + clusterID + machineRole + `-unhealthy
namespace: cluster-` + clusterID + `
spec:
clusterName: ` + clusterID + `
nodeStartupTimeout: 300s
selector:
matchLabels:
keos.stratio.com/machine-role: ` + clusterID + machineRole + `
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 60s
- type: Ready
status: 'False'
timeout: 60s`

raw := bytes.Buffer{}
cmd := node.Command("sh", "-c", "echo \""+machineHealthCheck+"\" > "+manifestPath)
if err := cmd.SetStdout(&raw).Run(); err != nil {
return errors.Wrap(err, "failed to write the MachineHealthCheck manifest")
}
return nil
}

0 comments on commit 0b266ef

Please sign in to comment.