Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect and report an ErrorPvcNotFound/ErrorDataVolumeNotFound VM status #6171

Merged
merged 6 commits into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 17 additions & 3 deletions pkg/virt-controller/services/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,21 @@ type templateService struct {
launcherSubGid int64
}

type PvcNotFoundError error
type PvcNotFoundError struct {
Reason string
}

func (e PvcNotFoundError) Error() string {
return e.Reason
}

type DataVolumeNotFoundError struct {
Reason string
}

func (e DataVolumeNotFoundError) Error() string {
return e.Reason
}

func isFeatureStateEnabled(fs *v1.FeatureState) bool {
return fs != nil && fs.Enabled != nil && *fs.Enabled
Expand Down Expand Up @@ -514,7 +528,7 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
return nil, err
} else if !exists {
logger.Errorf("didn't find PVC %v", claimName)
return nil, PvcNotFoundError(fmt.Errorf("didn't find PVC %v", claimName))
return nil, PvcNotFoundError{Reason: fmt.Sprintf("didn't find PVC %v", claimName)}
} else if isBlock {
devicePath := filepath.Join(string(filepath.Separator), "dev", volume.Name)
device := k8sv1.VolumeDevice{
Expand Down Expand Up @@ -582,7 +596,7 @@ func (t *templateService) renderLaunchManifest(vmi *v1.VirtualMachineInstance, t
return nil, err
} else if !exists {
logger.Errorf("didn't find PVC associated with DataVolume: %v", claimName)
return nil, PvcNotFoundError(fmt.Errorf("didn't find PVC associated with DataVolume: %v", claimName))
return nil, PvcNotFoundError{Reason: fmt.Sprintf("didn't find PVC associated with DataVolume: %v", claimName)}
} else if isBlock {
devicePath := filepath.Join(string(filepath.Separator), "dev", volume.Name)
device := k8sv1.VolumeDevice{
Expand Down
3 changes: 1 addition & 2 deletions pkg/virt-controller/services/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package services

import (
"errors"
"fmt"
"strconv"
"strings"
Expand Down Expand Up @@ -2126,7 +2125,7 @@ var _ = Describe("Template", func() {

_, err := svc.RenderLaunchManifest(&vmi)
Expect(err).To(HaveOccurred(), "Render manifest results in an error")
Expect(err).To(BeAssignableToTypeOf(PvcNotFoundError(errors.New(""))), "Render manifest results in an PvsNotFoundError")
Expect(err).To(BeAssignableToTypeOf(PvcNotFoundError{}), "Render manifest results in an PvsNotFoundError")
})
})

Expand Down
18 changes: 18 additions & 0 deletions pkg/virt-controller/watch/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -1622,6 +1622,8 @@ func (c *VMController) setPrintableStatus(vm *virtv1.VirtualMachine, vmi *virtv1
{virtv1.VirtualMachineStatusMigrating, c.isVirtualMachineStatusMigrating},
{virtv1.VirtualMachineStatusPaused, c.isVirtualMachineStatusPaused},
{virtv1.VirtualMachineStatusRunning, c.isVirtualMachineStatusRunning},
{virtv1.VirtualMachineStatusPvcNotFound, c.isVirtualMachineStatusPvcNotFound},
{virtv1.VirtualMachineStatusDataVolumeNotFound, c.isVirtualMachineStatusDataVolumeNotFound},
{virtv1.VirtualMachineStatusUnschedulable, c.isVirtualMachineStatusUnschedulable},
{virtv1.VirtualMachineStatusProvisioning, c.isVirtualMachineStatusProvisioning},
{virtv1.VirtualMachineStatusErrImagePull, c.isVirtualMachineStatusErrImagePull},
Expand Down Expand Up @@ -1747,6 +1749,22 @@ func (c *VMController) isVirtualMachineStatusImagePullBackOff(vm *virtv1.Virtual
return syncCond != nil && syncCond.Status == k8score.ConditionFalse && syncCond.Reason == ImagePullBackOffReason
}

// isVirtualMachineStatusPvcNotFound determines whether the VM status field should be set to "FailedPvcNotFound".
func (c *VMController) isVirtualMachineStatusPvcNotFound(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool {
return controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatusAndReason(vmi,
virtv1.VirtualMachineInstanceSynchronized,
k8score.ConditionFalse,
FailedPvcNotFoundReason)
}

// isVirtualMachineStatusDataVolumeNotFound determines whether the VM status field should be set to "FailedDataVolumeNotFound".
func (c *VMController) isVirtualMachineStatusDataVolumeNotFound(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) bool {
return controller.NewVirtualMachineInstanceConditionManager().HasConditionWithStatusAndReason(vmi,
virtv1.VirtualMachineInstanceSynchronized,
k8score.ConditionFalse,
FailedDataVolumeNotFoundReason)
}

func (c *VMController) syncReadyConditionFromVMI(vm *virtv1.VirtualMachine, vmi *virtv1.VirtualMachineInstance) {
conditionManager := controller.NewVirtualMachineConditionManager()
vmiReadyCond := controller.NewVirtualMachineInstanceConditionManager().
Expand Down
53 changes: 37 additions & 16 deletions pkg/virt-controller/watch/vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2085,25 +2085,46 @@ var _ = Describe("VirtualMachine", func() {
controller.Execute()
})

It("should set a FailedUnschedulable status when VMI has a PodScheduled=False condition with Unschedulable reason", func() {
vm, vmi := DefaultVirtualMachine(true)
vmi.Status.Phase = virtv1.Scheduling
vmi.Status.Conditions = append(vmi.Status.Conditions, virtv1.VirtualMachineInstanceCondition{
Type: virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled),
Status: k8sv1.ConditionFalse,
Reason: k8sv1.PodReasonUnschedulable,
})
table.DescribeTable("should set a failure status in accordance to VMI condition",
func(status virtv1.VirtualMachinePrintableStatus, cond v1.VirtualMachineInstanceCondition) {

addVirtualMachine(vm)
vmiFeeder.Add(vmi)
vm, vmi := DefaultVirtualMachine(true)
vmi.Status.Phase = virtv1.Scheduling
vmi.Status.Conditions = append(vmi.Status.Conditions, cond)

vmInterface.EXPECT().UpdateStatus(gomock.Any()).Times(1).Do(func(obj interface{}) {
objVM := obj.(*v1.VirtualMachine)
Expect(objVM.Status.PrintableStatus).To(Equal(v1.VirtualMachineStatusUnschedulable))
})
addVirtualMachine(vm)
vmiFeeder.Add(vmi)

controller.Execute()
})
vmInterface.EXPECT().UpdateStatus(gomock.Any()).Times(1).Do(func(obj interface{}) {
objVM := obj.(*v1.VirtualMachine)
Expect(objVM.Status.PrintableStatus).To(Equal(status))
})

controller.Execute()
},

table.Entry("FailedUnschedulable", v1.VirtualMachineStatusUnschedulable,
virtv1.VirtualMachineInstanceCondition{
Type: virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled),
Status: k8sv1.ConditionFalse,
Reason: k8sv1.PodReasonUnschedulable,
},
),
table.Entry("FailedPvcNotFound", v1.VirtualMachineStatusPvcNotFound,
virtv1.VirtualMachineInstanceCondition{
Type: virtv1.VirtualMachineInstanceSynchronized,
Status: k8sv1.ConditionFalse,
Reason: FailedPvcNotFoundReason,
},
),
table.Entry("FailedDataVolumeNotFound", v1.VirtualMachineStatusDataVolumeNotFound,
virtv1.VirtualMachineInstanceCondition{
Type: virtv1.VirtualMachineInstanceSynchronized,
Status: k8sv1.ConditionFalse,
Reason: FailedDataVolumeNotFoundReason,
},
),
)

table.DescribeTable("should set an ImagePullBackOff/ErrPullImage statuses according to VMI Synchronized condition", func(reason string) {
vm, vmi := DefaultVirtualMachine(true)
Expand Down
26 changes: 22 additions & 4 deletions pkg/virt-controller/watch/vmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ const (
// FailedPvcNotFoundReason is added in an event
// when a PVC for a volume was not found.
FailedPvcNotFoundReason = "FailedPvcNotFound"
// FailedDataVolumeNotFoundReason is added in an event
// when a DataVolume for a volume was not found.
FailedDataVolumeNotFoundReason = "FailedDataVolumeNotFound"
// SuccessfulMigrationReason is added when a migration attempt completes successfully
SuccessfulMigrationReason = "SuccessfulMigration"
// FailedMigrationReason is added when a migration attempt fails
Expand Down Expand Up @@ -442,7 +445,8 @@ func (c *VMIController) updateStatus(vmi *virtv1.VirtualMachineInstance, pod *k8
}
}
}
if syncErr != nil && syncErr.Reason() == FailedPvcNotFoundReason {
if syncErr != nil &&
(syncErr.Reason() == FailedPvcNotFoundReason || syncErr.Reason() == FailedDataVolumeNotFoundReason) {
condition := virtv1.VirtualMachineInstanceCondition{
Type: virtv1.VirtualMachineInstanceConditionType(k8sv1.PodScheduled),
Reason: k8sv1.PodReasonUnschedulable,
Expand Down Expand Up @@ -982,6 +986,9 @@ func (c *VMIController) handleSyncDataVolumes(vmi *virtv1.VirtualMachineInstance
// Keep existing behavior of missing PVC = ready. This in turn triggers template render, which sets conditions and events, and fails appropriately
if _, ok := err.(services.PvcNotFoundError); ok {
continue
} else if _, ok := err.(services.DataVolumeNotFoundError); ok {
c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, FailedDataVolumeNotFoundReason, "DataVolume is referenced by VMI but doesn't exist: %v", err)
return false, false, &syncErrorImpl{fmt.Errorf("DataVolume is referenced by VMI but doesn't exist: %v", err), FailedDataVolumeNotFoundReason}
} else {
c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, FailedPvcNotFoundReason, "Error determining if volume is ready: %v", err)
return false, false, &syncErrorImpl{fmt.Errorf("Error determining if volume is ready %v", err), FailedDataVolumeImportReason}
Expand Down Expand Up @@ -1643,19 +1650,30 @@ func (c *VMIController) volumeReadyToAttachToNode(namespace string, volume virtv
} else if volume.PersistentVolumeClaim != nil {
name = volume.PersistentVolumeClaim.ClaimName
}

dataVolumeFunc := dataVolumeByNameFunc(c.dataVolumeInformer, dataVolumes)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is one other check if the volume is populated for hotplug. Is that covered too?

It may make sense to get rid of this callback function and now, that we have to do pre-validation to see if the DV exist at all, and just pass in the data volume to IsPopulated. Since that is provided by CDI, it could probably be wrapped so that one can just pass in the DV. Not a big thing but the flow is pretty bumpy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is one other check if the volume is populated for hotplug. Is that covered too?

I think it is, since volumeReadyToAttachToNode is invoked for every datavolume, be it hotplugged or not.

It may make sense to get rid of this callback function and now, that we have to do pre-validation to see if the DV exist at all, and just pass in the data volume to IsPopulated. Since that is provided by CDI, it could probably be wrapped so that one can just pass in the DV. Not a big thing but the flow is pretty bumpy.

I'm not sure I entirely follow. Did you mean to change the code such that the dataVolumeFunc that is passed into IsPopulated will directly return the correct DataVolume object? If so, than this is not too far from what's already being done.


if volume.DataVolume != nil {
// First, ensure DataVolume exists
_, err := dataVolumeFunc(name, namespace)
if err != nil {
return false, false, services.DataVolumeNotFoundError{Reason: err.Error()}
}
}

wffc := false
ready := false
// err is always nil
pvcInterface, pvcExists, _ := c.pvcInformer.GetStore().GetByKey(fmt.Sprintf("%s/%s", namespace, name))
if pvcExists {
var err error
pvc := pvcInterface.(*k8sv1.PersistentVolumeClaim)
ready, err = cdiv1.IsPopulated(pvc, dataVolumeByNameFunc(c.dataVolumeInformer, dataVolumes))
ready, err = cdiv1.IsPopulated(pvc, dataVolumeFunc)
if err != nil {
return false, false, err
}
if !ready {
waitsForFirstConsumer, err := cdiv1.IsWaitForFirstConsumerBeforePopulating(pvc, dataVolumeByNameFunc(c.dataVolumeInformer, dataVolumes))
waitsForFirstConsumer, err := cdiv1.IsWaitForFirstConsumerBeforePopulating(pvc, dataVolumeFunc)
if err != nil {
return false, false, err
}
Expand All @@ -1664,7 +1682,7 @@ func (c *VMIController) volumeReadyToAttachToNode(namespace string, volume virtv
}
}
} else {
return false, false, services.PvcNotFoundError(fmt.Errorf("didn't find PVC %v", name))
return false, false, services.PvcNotFoundError{Reason: fmt.Sprintf("didn't find PVC %v", name)}
}
return ready, wffc, nil
}
Expand Down
121 changes: 67 additions & 54 deletions pkg/virt-controller/watch/vmi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -873,69 +873,82 @@ var _ = Describe("VirtualMachineInstance watcher", func() {
Expect(mockQueue.GetRateLimitedEnqueueCount()).To(Equal(0))
testutils.ExpectEvent(recorder, SuccessfulCreatePodReason)
})
It("should create PodScheduled and Synchronized conditions exactly once each for repeated FailedPvcNotFoundReason sync error", func() {

expectConditions := func(vmi *v1.VirtualMachineInstance) {
// PodScheduled and Synchronized (as well as Ready)
Expect(len(vmi.Status.Conditions)).To(Equal(3), "there should be exactly 3 conditions")

getType := func(c v1.VirtualMachineInstanceCondition) string { return string(c.Type) }
getReason := func(c v1.VirtualMachineInstanceCondition) string { return c.Reason }
Expect(vmi.Status.Conditions).To(
And(
ContainElement(
WithTransform(getType, Equal(string(v1.VirtualMachineInstanceSynchronized))),
),
ContainElement(
And(
WithTransform(getType, Equal(string(k8sv1.PodScheduled))),
WithTransform(getReason, Equal(k8sv1.PodReasonUnschedulable)),
table.DescribeTable("should create PodScheduled and Synchronized conditions exactly once each for repeated FailedPvcNotFoundReason/FailedDataVolumeNotFoundReason sync errors",
func(syncReason string, volumeSource v1.VolumeSource) {

expectConditions := func(vmi *v1.VirtualMachineInstance) {
// PodScheduled and Synchronized (as well as Ready)
Expect(len(vmi.Status.Conditions)).To(Equal(3), "there should be exactly 3 conditions")

getType := func(c v1.VirtualMachineInstanceCondition) string { return string(c.Type) }
getReason := func(c v1.VirtualMachineInstanceCondition) string { return c.Reason }
Expect(vmi.Status.Conditions).To(
And(
ContainElement(
And(
WithTransform(getType, Equal(string(v1.VirtualMachineInstanceSynchronized))),
WithTransform(getReason, Equal(syncReason)),
),
),
ContainElement(
And(
WithTransform(getType, Equal(string(k8sv1.PodScheduled))),
WithTransform(getReason, Equal(k8sv1.PodReasonUnschedulable)),
),
),
),
),
)
)

testutils.ExpectEvent(recorder, FailedPvcNotFoundReason)
}
testutils.ExpectEvent(recorder, syncReason)
}

vmi := NewPendingVirtualMachine("testvmi")
setReadyCondition(vmi, k8sv1.ConditionFalse, v1.PodNotExistsReason)
vmi := NewPendingVirtualMachine("testvmi")
setReadyCondition(vmi, k8sv1.ConditionFalse, v1.PodNotExistsReason)

vmi.Spec.Volumes = []v1.Volume{
{
Name: "test",
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
PersistentVolumeClaimVolumeSource: k8sv1.PersistentVolumeClaimVolumeSource{
ClaimName: "something",
},
},
vmi.Spec.Volumes = []v1.Volume{
{
Name: "test",
VolumeSource: volumeSource,
},
},
}
addVirtualMachine(vmi)
update := vmiInterface.EXPECT().Update(gomock.Any())
update.Do(func(vmi *v1.VirtualMachineInstance) {
expectConditions(vmi)
vmiInformer.GetStore().Update(vmi)
key := kvcontroller.VirtualMachineInstanceKey(vmi)
controller.vmiExpectations.LowerExpectations(key, 1, 0)
update.Return(vmi, nil)
})
}
addVirtualMachine(vmi)
update := vmiInterface.EXPECT().Update(gomock.Any())
update.Do(func(vmi *v1.VirtualMachineInstance) {
expectConditions(vmi)
vmiInformer.GetStore().Update(vmi)
key := kvcontroller.VirtualMachineInstanceKey(vmi)
controller.vmiExpectations.LowerExpectations(key, 1, 0)
update.Return(vmi, nil)
})

controller.Execute()
Expect(controller.Queue.Len()).To(Equal(0))
Expect(mockQueue.GetRateLimitedEnqueueCount()).To(Equal(1))
controller.Execute()
Expect(controller.Queue.Len()).To(Equal(0))
Expect(mockQueue.GetRateLimitedEnqueueCount()).To(Equal(1))

// make sure that during next iteration we do not add the same condition again
vmiInterface.EXPECT().Update(gomock.Any()).Do(func(vmi *v1.VirtualMachineInstance) {
expectConditions(vmi)
}).Return(vmi, nil)
// make sure that during next iteration we do not add the same condition again
vmiInterface.EXPECT().Update(gomock.Any()).Do(func(vmi *v1.VirtualMachineInstance) {
expectConditions(vmi)
}).Return(vmi, nil)

controller.Execute()
Expect(controller.Queue.Len()).To(Equal(0))
Expect(mockQueue.GetRateLimitedEnqueueCount()).To(Equal(2))
})
controller.Execute()
Expect(controller.Queue.Len()).To(Equal(0))
Expect(mockQueue.GetRateLimitedEnqueueCount()).To(Equal(2))
},
table.Entry("when PVC does not exist", FailedPvcNotFoundReason,
v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
PersistentVolumeClaimVolumeSource: k8sv1.PersistentVolumeClaimVolumeSource{
ClaimName: "something",
},
},
}),
table.Entry("when DataVolume does not exist", FailedDataVolumeNotFoundReason,
v1.VolumeSource{
DataVolume: &v1.DataVolumeSource{
Name: "something",
},
}),
)

table.DescribeTable("should move the vmi to scheduling state if a pod exists", func(phase k8sv1.PodPhase, isReady bool) {
vmi := NewPendingVirtualMachine("testvmi")
Expand Down
8 changes: 6 additions & 2 deletions staging/src/kubevirt.io/client-go/api/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ const (
// VirtualMachineStatusTerminating indicates that the virtual machine is in the process of deletion,
// as well as its associated resources (VirtualMachineInstance, DataVolumes, …).
VirtualMachineStatusTerminating VirtualMachinePrintableStatus = "Terminating"
// VirtualMachineStatusCrashLoopBackOff indicates that the virtual machine is currently in a crash loop waiting to be retried
// VirtualMachineStatusCrashLoopBackOff indicates that the virtual machine is currently in a crash loop waiting to be retried.
VirtualMachineStatusCrashLoopBackOff VirtualMachinePrintableStatus = "CrashLoopBackOff"
// VirtualMachineStatusMigrating indicates that the virtual machine is in the process of being migrated
// to another host.
Expand All @@ -1268,13 +1268,17 @@ const (
VirtualMachineStatusUnknown VirtualMachinePrintableStatus = "Unknown"
// VirtualMachineStatusUnschedulable indicates that an error has occurred while scheduling the virtual machine,
// e.g. due to unsatisfiable resource requests or unsatisfiable scheduling constraints.
VirtualMachineStatusUnschedulable VirtualMachinePrintableStatus = "FailedUnschedulable"
VirtualMachineStatusUnschedulable VirtualMachinePrintableStatus = "ErrorUnschedulable"
// VirtualMachineStatusErrImagePull indicates that an error has occured while pulling an image for
// a containerDisk VM volume.
VirtualMachineStatusErrImagePull VirtualMachinePrintableStatus = "ErrImagePull"
// VirtualMachineStatusImagePullBackOff indicates that an error has occured while pulling an image for
// a containerDisk VM volume, and that kubelet is backing off before retrying.
VirtualMachineStatusImagePullBackOff VirtualMachinePrintableStatus = "ImagePullBackOff"
// VirtualMachineStatusPvcNotFound indicates that the virtual machine references a PVC volume which doesn't exist.
VirtualMachineStatusPvcNotFound VirtualMachinePrintableStatus = "ErrorPvcNotFound"
// VirtualMachineStatusDataVolumeNotFound indicates that the virtual machine references a DataVolume volume which doesn't exist.
VirtualMachineStatusDataVolumeNotFound VirtualMachinePrintableStatus = "ErrorDataVolumeNotFound"
)

// VirtualMachineStartFailure tracks VMIs which failed to transition successfully
Expand Down