Skip to content

Commit

Permalink
Merge pull request #39 from jsafrane/dont-degrade
Browse files Browse the repository at this point in the history
Bug 1943719: Don't degrade cluster on connection error
  • Loading branch information
openshift-merge-robot committed May 10, 2021
2 parents c32cbf1 + df807c6 commit 50a2cab
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 63 deletions.
4 changes: 2 additions & 2 deletions pkg/check/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func CheckStorageClasses(ctx *CheckContext) error {

var errs []error
for i := range scs {
sc := &scs[i]
sc := scs[i]
if sc.Provisioner != "kubernetes.io/vsphere-volume" {
klog.V(4).Infof("Skipping storage class %s: not a vSphere class", sc.Name)
continue
Expand Down Expand Up @@ -77,7 +77,7 @@ func CheckPVs(ctx *CheckContext) error {
return err
}
for i := range pvs {
pv := &pvs[i]
pv := pvs[i]
if pv.Spec.VsphereVolume == nil {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/check/datastore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestCheckStorageClassesWithDatastore(t *testing.T) {
kubeClient := &fakeKubeClient{
infrastructure: infrastructure(),
nodes: defaultNodes(),
storageClasses: []storagev1.StorageClass{
storageClasses: []*storagev1.StorageClass{
{
ObjectMeta: metav1.ObjectMeta{
Name: test.name,
Expand Down Expand Up @@ -148,7 +148,7 @@ func TestCheckPVs(t *testing.T) {
kubeClient := &fakeKubeClient{
infrastructure: infrastructure(),
nodes: defaultNodes(),
pvs: []v1.PersistentVolume{
pvs: []*v1.PersistentVolume{
{
ObjectMeta: metav1.ObjectMeta{
Name: test.name,
Expand Down
22 changes: 11 additions & 11 deletions pkg/check/framework_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ func setupSimulator(kubeClient *fakeKubeClient, modelDir string) (ctx *CheckCont

type fakeKubeClient struct {
infrastructure *ocpv1.Infrastructure
nodes []v1.Node
storageClasses []storagev1.StorageClass
pvs []v1.PersistentVolume
nodes []*v1.Node
storageClasses []*storagev1.StorageClass
pvs []*v1.PersistentVolume
}

var _ KubeClient = &fakeKubeClient{}
Expand All @@ -113,20 +113,20 @@ func (f *fakeKubeClient) GetInfrastructure(ctx context.Context) (*ocpv1.Infrastr
return f.infrastructure, nil
}

func (f *fakeKubeClient) ListNodes(ctx context.Context) ([]v1.Node, error) {
func (f *fakeKubeClient) ListNodes(ctx context.Context) ([]*v1.Node, error) {
return f.nodes, nil
}

func (f *fakeKubeClient) ListStorageClasses(ctx context.Context) ([]storagev1.StorageClass, error) {
func (f *fakeKubeClient) ListStorageClasses(ctx context.Context) ([]*storagev1.StorageClass, error) {
return f.storageClasses, nil
}

func (f *fakeKubeClient) ListPVs(ctx context.Context) ([]v1.PersistentVolume, error) {
func (f *fakeKubeClient) ListPVs(ctx context.Context) ([]*v1.PersistentVolume, error) {
return f.pvs, nil
}

func node(name string, modifiers ...func(*v1.Node)) v1.Node {
n := v1.Node{
func node(name string, modifiers ...func(*v1.Node)) *v1.Node {
n := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Expand All @@ -135,7 +135,7 @@ func node(name string, modifiers ...func(*v1.Node)) v1.Node {
},
}
for _, modifier := range modifiers {
modifier(&n)
modifier(n)
}
return n
}
Expand All @@ -146,8 +146,8 @@ func withProviderID(id string) func(*v1.Node) {
}
}

func defaultNodes() []v1.Node {
nodes := []v1.Node{}
func defaultNodes() []*v1.Node {
nodes := []*v1.Node{}
for _, vm := range defaultVMs {
node := node(vm.name, withProviderID("vsphere://"+vm.uuid))
nodes = append(nodes, node)
Expand Down
6 changes: 3 additions & 3 deletions pkg/check/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ type KubeClient interface {
// GetInfrastructure returns current Infrastructure instance.
GetInfrastructure(ctx context.Context) (*ocpv1.Infrastructure, error)
// ListNodes returns list of all nodes in the cluster.
ListNodes(ctx context.Context) ([]v1.Node, error)
ListNodes(ctx context.Context) ([]*v1.Node, error)
// ListStorageClasses returns list of all storage classes in the cluster.
ListStorageClasses(ctx context.Context) ([]storagev1.StorageClass, error)
ListStorageClasses(ctx context.Context) ([]*storagev1.StorageClass, error)
// ListPVs returns list of all PVs in the cluster.
ListPVs(ctx context.Context) ([]v1.PersistentVolume, error)
ListPVs(ctx context.Context) ([]*v1.PersistentVolume, error)
}

type CheckContext struct {
Expand Down
4 changes: 2 additions & 2 deletions pkg/check/node_esxi_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ func TestCollectNodeESXiVersion(t *testing.T) {
}

for _, node := range kubeClient.nodes {
vm, err := getVM(ctx, &node)
vm, err := getVM(ctx, node)
if err != nil {
t.Errorf("Error getting vm for node %s: %s", node.Name, err)
}
err = check.CheckNode(ctx, &node, vm)
err = check.CheckNode(ctx, node, vm)
if err != nil {
t.Errorf("Unexpected error on node %s: %s", node.Name, err)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/check/node_hw_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ vsphere_node_hw_version_total{hw_version="vmx-15"} 1
defer cleanup()

// Set HW version of the first VM. Leave the other VMs with the default version (vmx-13).
node := &kubeClient.nodes[0]
node := kubeClient.nodes[0]
err = customizeVM(ctx, node, &types.VirtualMachineConfigSpec{
ExtraConfig: []types.BaseOptionValue{
&types.OptionValue{
Expand All @@ -72,11 +72,11 @@ vsphere_node_hw_version_total{hw_version="vmx-15"} 1
}

for _, node := range kubeClient.nodes {
vm, err := getVM(ctx, &node)
vm, err := getVM(ctx, node)
if err != nil {
t.Errorf("Error getting vm for node %s: %s", node.Name, err)
}
err = check.CheckNode(ctx, &node, vm)
err = check.CheckNode(ctx, node, vm)
if err != nil {
t.Errorf("Unexpected error on node %s: %s", node.Name, err)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/check/node_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func TestCheckNodeProviderID(t *testing.T) {
tests := []struct {
name string
node v1.Node
node *v1.Node
expectError bool
}{
{
Expand All @@ -34,7 +34,7 @@ func TestCheckNodeProviderID(t *testing.T) {
}

kubeClient := &fakeKubeClient{
nodes: []v1.Node{test.node},
nodes: []*v1.Node{test.node},
}
ctx, cleanup, err := setupSimulator(kubeClient, defaultModel)
if err != nil {
Expand All @@ -43,7 +43,7 @@ func TestCheckNodeProviderID(t *testing.T) {
defer cleanup()

// Act
err = check.CheckNode(ctx, &test.node, nil)
err = check.CheckNode(ctx, test.node, nil)

// Assert
if err != nil && !test.expectError {
Expand Down
4 changes: 2 additions & 2 deletions pkg/check/node_uuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func TestCheckNodeDiskUUID(t *testing.T) {
defer cleanup()

// Set VM disk.enableUUID
node := &kubeClient.nodes[0]
node := kubeClient.nodes[0]
err = customizeVM(ctx, node, &types.VirtualMachineConfigSpec{
ExtraConfig: []types.BaseOptionValue{
&types.OptionValue{
Expand All @@ -60,7 +60,7 @@ func TestCheckNodeDiskUUID(t *testing.T) {
}

// Act
err = check.CheckNode(ctx, &kubeClient.nodes[0], vm)
err = check.CheckNode(ctx, kubeClient.nodes[0], vm)

// Assert
if err != nil && !test.expectError {
Expand Down
26 changes: 7 additions & 19 deletions pkg/operator/kubeclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/openshift/vsphere-problem-detector/pkg/check"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

var _ check.KubeClient = &vSphereProblemDetectorController{}
Expand All @@ -18,26 +18,14 @@ func (c *vSphereProblemDetectorController) GetInfrastructure(ctx context.Context
return c.infraLister.Get(infrastructureName)
}

func (c *vSphereProblemDetectorController) ListNodes(ctx context.Context) ([]v1.Node, error) {
list, err := c.kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
return list.Items, nil
func (c *vSphereProblemDetectorController) ListNodes(ctx context.Context) ([]*v1.Node, error) {
return c.nodeLister.List(labels.Everything())
}

func (c *vSphereProblemDetectorController) ListStorageClasses(ctx context.Context) ([]storagev1.StorageClass, error) {
list, err := c.kubeClient.StorageV1().StorageClasses().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
return list.Items, nil
func (c *vSphereProblemDetectorController) ListStorageClasses(ctx context.Context) ([]*storagev1.StorageClass, error) {
return c.scLister.List(labels.Everything())
}

func (c *vSphereProblemDetectorController) ListPVs(ctx context.Context) ([]v1.PersistentVolume, error) {
list, err := c.kubeClient.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
return list.Items, nil
func (c *vSphereProblemDetectorController) ListPVs(ctx context.Context) ([]*v1.PersistentVolume, error) {
return c.pvLister.List(labels.Everything())
}
9 changes: 9 additions & 0 deletions pkg/operator/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,20 @@ var (
},
[]string{checkNameLabel, nodeNameLabel},
)

syncErrrorMetric = metrics.NewGauge(
&metrics.GaugeOpts{
Name: "vsphere_sync_errors",
Help: "Indicates failing vSphere problem detector sync error. Value 1 means that the last sync failed.",
StabilityLevel: metrics.ALPHA,
},
)
)

func init() {
legacyregistry.MustRegister(clusterCheckTotalMetric)
legacyregistry.MustRegister(clusterCheckErrrorMetric)
legacyregistry.MustRegister(nodeCheckTotalMetric)
legacyregistry.MustRegister(nodeCheckErrrorMetric)
legacyregistry.MustRegister(syncErrrorMetric)
}

0 comments on commit 50a2cab

Please sign in to comment.