Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix crash on delete with no bastion #2017

Merged
merged 2 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ e2e-templates: $(addprefix $(E2E_NO_ARTIFACT_TEMPLATES_DIR)/, \
cluster-template.yaml \
cluster-template-flatcar.yaml \
cluster-template-k8s-upgrade.yaml \
cluster-template-flatcar-sysext.yaml)
cluster-template-flatcar-sysext.yaml \
cluster-template-no-bastion.yaml)
# Currently no templates that require CI artifacts
# $(addprefix $(E2E_TEMPLATES_DIR)/, add-templates-here.yaml) \

Expand Down
14 changes: 6 additions & 8 deletions controllers/openstackcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,13 @@ func deleteBastion(scope *scope.WithLogger, cluster *clusterv1.Cluster, openStac
}

// If no instance was created we currently need to check for orphaned
// volumes. This requires resolving the instance spec.
// TODO: write volumes to status resources on creation so this is no longer required.
// volumes.
if instanceStatus == nil {
instanceSpec, err := bastionToInstanceSpec(openStackCluster, cluster)
if err != nil {
return err
}
if err := computeService.DeleteVolumes(instanceSpec); err != nil {
return fmt.Errorf("delete volumes: %w", err)
bastion := openStackCluster.Spec.Bastion
if bastion != nil && bastion.Spec != nil {
if err := computeService.DeleteVolumes(bastionName(cluster.Name), bastion.Spec.RootVolume, bastion.Spec.AdditionalBlockDevices); err != nil {
return fmt.Errorf("delete volumes: %w", err)
}
}
} else {
instanceNS, err := instanceStatus.NetworkStatus()
Expand Down
15 changes: 4 additions & 11 deletions controllers/openstackmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,19 +300,12 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope *scope.WithLogger, cl
}

// If no instance was created we currently need to check for orphaned
// volumes. This requires resolving the instance spec.
// TODO: write volumes to status resources on creation so this is no longer required.
if instanceStatus == nil && openStackMachine.Status.Resolved != nil {
instanceSpec, err := machineToInstanceSpec(openStackCluster, machine, openStackMachine, "")
if err != nil {
return ctrl.Result{}, err
}
if err := computeService.DeleteVolumes(instanceSpec); err != nil {
// volumes.
if instanceStatus == nil {
if err := computeService.DeleteVolumes(openStackMachine.Name, openStackMachine.Spec.RootVolume, openStackMachine.Spec.AdditionalBlockDevices); err != nil {
return ctrl.Result{}, fmt.Errorf("delete volumes: %w", err)
}
}

if instanceStatus != nil {
} else {
if err := computeService.DeleteInstance(openStackMachine, instanceStatus); err != nil {
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeleteFailedReason, clusterv1.ConditionSeverityError, "Deleting instance failed: %v", err)
return ctrl.Result{}, fmt.Errorf("delete instance: %w", err)
Expand Down
15 changes: 9 additions & 6 deletions pkg/cloud/services/compute/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,14 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *Ins
}

// DeleteVolumes deletes any cinder volumes which were created for the instance.
// Note that this must only be called when the server was not successfully
// Note that this need only be called when the server was not successfully
// created. If the server was created the volume will have been added with
// DeleteOnTermination=true, and will be automatically cleaned up with the
// server.
func (s *Service) DeleteVolumes(instanceSpec *InstanceSpec) error {
// We don't pass InstanceSpec here because we only require instance name,
// rootVolume, and additionalBlockDevices, and resolving the whole InstanceSpec
// introduces unnecessary failure modes.
func (s *Service) DeleteVolumes(instanceName string, rootVolume *infrav1.RootVolume, additionalBlockDevices []infrav1.AdditionalBlockDevice) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually like this, it indicates that getting volumes from the spec is the only leftover and will change soon.

/*
Attaching volumes to an instance is a two-step process:

Expand All @@ -455,13 +458,13 @@ func (s *Service) DeleteVolumes(instanceSpec *InstanceSpec) error {
DeleteOnTermination will ensure it is deleted in that case.
*/

if hasRootVolume(instanceSpec) {
if err := s.deleteVolume(instanceSpec.Name, "root"); err != nil {
if rootVolume != nil && rootVolume.SizeGiB > 0 {
if err := s.deleteVolume(instanceName, "root"); err != nil {
return err
}
}
for _, volumeSpec := range instanceSpec.AdditionalBlockDevices {
if err := s.deleteVolume(instanceSpec.Name, volumeSpec.Name); err != nil {
for _, volumeSpec := range additionalBlockDevices {
if err := s.deleteVolume(instanceName, volumeSpec.Name); err != nil {
return err
}
}
Expand Down
11 changes: 11 additions & 0 deletions test/e2e/data/kustomize/no-bastion/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../default

patches:
- path: patch-no-bastion.yaml
target:
kind: OpenStackCluster
name: \${CLUSTER_NAME}
3 changes: 3 additions & 0 deletions test/e2e/data/kustomize/no-bastion/patch-no-bastion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
- op: remove
path: /spec/bastion
92 changes: 48 additions & 44 deletions test/e2e/shared/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,51 +214,55 @@ func (o OpenStackLogCollector) CollectMachineLog(ctx context.Context, management
return fmt.Errorf("error writing server JSON %s: %s", serverJSON, err)
}

srvUser := o.E2EContext.E2EConfig.GetVariable(SSHUserMachine)
executeCommands(
ctx,
o.E2EContext.Settings.ArtifactFolder,
o.E2EContext.Settings.Debug,
outputPath,
ip,
openStackCluster.Status.Bastion.FloatingIP,
srvUser,
[]command{
// don't do this for now, it just takes to long
// {
// title: "systemd",
// cmd: "journalctl --no-pager --output=short-precise | grep -v 'audit:\\|audit\\['",
// },
{
title: "kern",
cmd: "journalctl --no-pager --output=short-precise -k",
if openStackCluster.Status.Bastion == nil {
Logf("Skipping log collection for machine %q since no bastion is available", m.Name)
} else {
srvUser := o.E2EContext.E2EConfig.GetVariable(SSHUserMachine)
executeCommands(
ctx,
o.E2EContext.Settings.ArtifactFolder,
o.E2EContext.Settings.Debug,
outputPath,
ip,
openStackCluster.Status.Bastion.FloatingIP,
srvUser,
[]command{
// don't do this for now, it just takes to long
// {
// title: "systemd",
// cmd: "journalctl --no-pager --output=short-precise | grep -v 'audit:\\|audit\\['",
// },
{
title: "kern",
cmd: "journalctl --no-pager --output=short-precise -k",
},
{
title: "containerd-info",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock info",
},
{
title: "containerd-containers",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps",
},
{
title: "containerd-pods",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock pods",
},
{
title: "cloud-final",
cmd: "journalctl --no-pager -u cloud-final",
},
{
title: "kubelet",
cmd: "journalctl --no-pager -u kubelet.service",
},
{
title: "containerd",
cmd: "journalctl --no-pager -u containerd.service",
},
},
{
title: "containerd-info",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock info",
},
{
title: "containerd-containers",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps",
},
{
title: "containerd-pods",
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock pods",
},
{
title: "cloud-final",
cmd: "journalctl --no-pager -u cloud-final",
},
{
title: "kubelet",
cmd: "journalctl --no-pager -u kubelet.service",
},
{
title: "containerd",
cmd: "journalctl --no-pager -u containerd.service",
},
},
)
)
}
return nil
}

Expand Down
1 change: 1 addition & 0 deletions test/e2e/shared/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
OpenStackNodeMachineFlavor = "OPENSTACK_NODE_MACHINE_FLAVOR"
SSHUserMachine = "SSH_USER_MACHINE"
FlavorDefault = ""
FlavorNoBastion = "no-bastion"
FlavorWithoutLB = "without-lb"
FlavorMultiNetwork = "multi-network"
FlavorMultiAZ = "multi-az"
Expand Down
36 changes: 36 additions & 0 deletions test/e2e/suites/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,42 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
})
})

Describe("Workload cluster (no bastion)", func() {
It("should be creatable and deletable", func() {
shared.Logf("Creating a cluster")
clusterName := fmt.Sprintf("cluster-%s", namespace.Name)
configCluster := defaultConfigCluster(clusterName, namespace.Name)
configCluster.ControlPlaneMachineCount = ptr.To(int64(1))
configCluster.WorkerMachineCount = ptr.To(int64(1))
configCluster.Flavor = shared.FlavorNoBastion
createCluster(ctx, configCluster, clusterResources)
md := clusterResources.MachineDeployments

workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
ClusterName: clusterName,
Namespace: namespace.Name,
MachineDeployment: *md[0],
})
controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
ClusterName: clusterName,
Namespace: namespace.Name,
})
Expect(workerMachines).To(HaveLen(1))
Expect(controlPlaneMachines).To(HaveLen(1))

shared.Logf("Waiting for worker nodes to be in Running phase")
statusChecks := []framework.MachineStatusCheck{framework.MachinePhaseCheck(string(clusterv1.MachinePhaseRunning))}
machineStatusInput := framework.WaitForMachineStatusCheckInput{
Getter: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
Machine: &workerMachines[0],
StatusChecks: statusChecks,
}
framework.WaitForMachineStatusCheck(ctx, machineStatusInput, e2eCtx.E2EConfig.GetIntervals(specName, "wait-machine-status")...)
})
})

Describe("Workload cluster (flatcar)", func() {
It("should be creatable and deletable", func() {
// Flatcar default user is "core"
Expand Down