Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1919336: Check if datastore belongs to a datastore cluster #28

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
87 changes: 81 additions & 6 deletions pkg/check/datastore.go
Expand Up @@ -6,22 +6,27 @@ import (
"os/exec"
"strings"

"github.com/vmware/govmomi/property"

configv1 "github.com/openshift/api/config/v1"
"github.com/vmware/govmomi/pbm"
"github.com/vmware/govmomi/pbm/types"
"github.com/vmware/govmomi/view"
"github.com/vmware/govmomi/vim25/mo"
vim "github.com/vmware/govmomi/vim25/types"
"k8s.io/klog/v2"
)

const (
dsParameter = "datastore"
storagePolicyParameter = "storagepolicyname"

// Maximum length of <cluster-id>-dynamic-pvc-<uuid> for volume names.
// Kubernetes uses 90, https://github.com/kubernetes/kubernetes/blob/93d288e2a47fa6d497b50d37c8b3a04e91da4228/pkg/volume/vsphere_volume/vsphere_volume_util.go#L100
// Using 63 to work around https://bugzilla.redhat.com/show_bug.cgi?id=1926943
maxVolumeName = 63
maxVolumeName = 63
dataCenterType = "Datacenter"
DatastoreInfoProperty = "info"
SummaryProperty = "summary"
)

// CheckStorageClasses tests that datastore name in all StorageClasses in the cluster is short enough.
jsafrane marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -47,7 +52,7 @@ func CheckStorageClasses(ctx *CheckContext) error {
for k, v := range sc.Parameters {
switch strings.ToLower(k) {
case dsParameter:
if err := checkDataStore(v, infra); err != nil {
if err := checkDataStore(ctx, v, infra); err != nil {
klog.V(2).Infof("CheckStorageClasses: %s: %s", sc.Name, err)
errs = append(errs, fmt.Errorf("StorageClass %s: %s", sc.Name, err))
}
Expand Down Expand Up @@ -95,7 +100,7 @@ func CheckDefaultDatastore(ctx *CheckContext) error {
}

dsName := ctx.VMConfig.Workspace.DefaultDatastore
if err := checkDataStore(dsName, infra); err != nil {
if err := checkDataStore(ctx, dsName, infra); err != nil {
return fmt.Errorf("defaultDatastore %q in vSphere configuration: %s", dsName, err)
}
return nil
Expand Down Expand Up @@ -124,7 +129,7 @@ func checkStoragePolicy(ctx *CheckContext, policyName string, infrastructure *co

var errs []error
for _, dataStore := range dataStores {
err := checkDataStore(dataStore, infrastructure)
err := checkDataStore(ctx, dataStore, infrastructure)
if err != nil {
errs = append(errs, fmt.Errorf("storage policy %s: %s", policyName, err))
}
Expand Down Expand Up @@ -231,14 +236,84 @@ func getPolicy(ctx *CheckContext, name string) ([]types.BasePbmProfile, error) {
return c.RetrieveContent(tctx, []types.PbmProfileId{{UniqueId: name}})
}

func checkDataStore(dsName string, infrastructure *configv1.Infrastructure) error {
func checkDataStore(ctx *CheckContext, dsName string, infrastructure *configv1.Infrastructure) error {
clusterID := infrastructure.Status.InfrastructureName
volumeName := generateVolumeName(clusterID, "pvc-00000000-0000-0000-0000-000000000000", maxVolumeName)
fullVolumeName := fmt.Sprintf("[%s] 00000000-0000-0000-0000-000000000000/%s.vmdk", dsName, volumeName)
klog.V(4).Infof("Checking data store %q with potential volume Name %s", dsName, fullVolumeName)
if err := checkVolumeName(fullVolumeName); err != nil {
return fmt.Errorf("datastore %s: %s", dsName, err)
}
if err := checkForDatastoreCluster(ctx, dsName); err != nil {
return err
}
return nil
}

func checkForDatastoreCluster(ctx *CheckContext, dataStoreName string) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps for a subsequent PR: This is called for every StorageClass and loads non-trivial amount of data from vSphere. Would it be possible to call it only once per dataStorageName? I.e. with a cache somewhere in CheckStorageClasses? It's not that easy, because there is CheckDefaultDatastore too...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we could. I think it might be worth caching the information within the same run. but once all the checks are run -we can automatically expire the cache, so as they aren't reused in next run.

matchingDC, err := getDatacenter(ctx, ctx.VMConfig.Workspace.Datacenter)
if err != nil {
klog.Errorf("error getting datacenter %s: %v", ctx.VMConfig.Workspace.Datacenter, err)
return err
}
ds, err := getDataStoreByName(ctx, dataStoreName, matchingDC)
if err != nil {
klog.Errorf("error getting datastore %s: %v", dataStoreName, err)
return err
}

var dsMo mo.Datastore
pc := property.DefaultCollector(matchingDC.Client())
properties := []string{DatastoreInfoProperty, SummaryProperty}
tctx, cancel := context.WithTimeout(ctx.Context, *Timeout)
defer cancel()
err = pc.RetrieveOne(tctx, ds.Reference(), properties, &dsMo)
if err != nil {
klog.Errorf("error getting properties of datastore %s: %v", dataStoreName, err)
return nil
}

// list datastore cluster
m := view.NewManager(ctx.VMClient)
kind := []string{"StoragePod"}
tctx, cancel = context.WithTimeout(ctx.Context, *Timeout)
defer cancel()
v, err := m.CreateContainerView(tctx, ctx.VMClient.ServiceContent.RootFolder, kind, true)
if err != nil {
klog.Errorf("error listing datastore cluster: %+v", err)
return nil
}
defer func() {
v.Destroy(tctx)
}()

var content []mo.StoragePod
tctx, cancel = context.WithTimeout(ctx.Context, *Timeout)
defer cancel()
err = v.Retrieve(tctx, kind, []string{SummaryProperty, "childEntity"}, &content)
if err != nil {
klog.Errorf("error retrieving datastore cluster properties: %+v", err)
jsafrane marked this conversation as resolved.
Show resolved Hide resolved
// it is possible that we do not actually have permission to fetch datastore clusters
// in which case rather than throwing an error - we will silently return nil, so as
// we don't trigger unnecessary alerts.
return nil
}

for _, ds := range content {
for _, child := range ds.Folder.ChildEntity {
tDS, err := getDatastore(ctx, child)
if err != nil {
// we may not have permissions to fetch unrelated datastores in OCP
// and hence we are going to ignore the error.
klog.Errorf("fetching datastore %s failed: %v", child.String(), err)
gnufied marked this conversation as resolved.
Show resolved Hide resolved
continue
}
if tDS.Summary.Url == dsMo.Summary.Url {
return fmt.Errorf("datastore %s is part of %s datastore cluster", tDS.Summary.Name, ds.Summary.Name)
}
}
}
klog.V(4).Infof("Checked datastore %s for SRDS - no problems found", dataStoreName)
return nil
}

Expand Down
43 changes: 41 additions & 2 deletions pkg/check/datastore_test.go
Expand Up @@ -17,9 +17,14 @@ var (
}{
{
name: "short datastore",
datastore: "short",
datastore: "LocalDS_1",
expectError: false,
},
{
name: "non-existant datastore",
datastore: "foobar", // this datastore does not exist and hence should result in error
expectError: true,
},
{
name: "long datastore",
datastore: "01234567890123456789012345678901234567890123456789", // 269 characters in the escaped path
Expand All @@ -30,6 +35,16 @@ var (
datastore: "0-1-2-3-4-5-6-7-8-9", // 265 characters in the escaped path
expectError: true,
},
{
name: "datastore which is part of a datastore cluster",
datastore: "/DC0/datastore/DC0_POD0/LocalDS_2",
expectError: true,
},
{
name: "datastore which is not part of a datastore cluster",
datastore: "/DC0/datastore/LocalDS_1",
expectError: false,
},
}
)

Expand Down Expand Up @@ -103,7 +118,31 @@ func TestCheckStorageClassesWithDatastore(t *testing.T) {
}

func TestCheckPVs(t *testing.T) {
for _, test := range datastoreTests {
var (
pvWithDatastoreNames = []struct {
name string
datastore string
expectError bool
}{
{
name: "short datastore",
datastore: "LocalDS_1",
expectError: false,
},
{
name: "long datastore",
datastore: "01234567890123456789012345678901234567890123456789", // 269 characters in the escaped path
expectError: true,
},
{
name: "short datastore with too many dashes",
datastore: "0-1-2-3-4-5-6-7-8-9", // 265 characters in the escaped path
expectError: true,
},
}
)

for _, test := range pvWithDatastoreNames {
t.Run(test.name, func(t *testing.T) {
// Stage
kubeClient := &fakeKubeClient{
Expand Down
13 changes: 3 additions & 10 deletions pkg/check/folder.go
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"

"github.com/vmware/govmomi/find"
"github.com/vmware/govmomi/object"
"github.com/vmware/govmomi/vim25/types"
"k8s.io/klog/v2"
Expand All @@ -17,18 +16,12 @@ import (
// The check tries to list "kubevols/". It tolerates when it's missing,
// it will be created by OCP on the first provisioning.
func CheckFolderPermissions(ctx *CheckContext) error {
tctx, cancel := context.WithTimeout(ctx.Context, *Timeout)
defer cancel()
finder := find.NewFinder(ctx.VMClient, false)
dc, err := finder.Datacenter(tctx, ctx.VMConfig.Workspace.Datacenter)
dc, err := getDatacenter(ctx, ctx.VMConfig.Workspace.Datacenter)
if err != nil {
return fmt.Errorf("failed to access datacenter %s: %s", ctx.VMConfig.Workspace.Datacenter, err)
return err
}

tctx, cancel = context.WithTimeout(ctx.Context, *Timeout)
defer cancel()
finder.SetDatacenter(dc)
ds, err := finder.Datastore(tctx, ctx.VMConfig.Workspace.DefaultDatastore)
ds, err := getDataStoreByName(ctx, ctx.VMConfig.Workspace.DefaultDatastore, dc)
if err != nil {
return fmt.Errorf("failed to access datastore %s: %s", ctx.VMConfig.Workspace.DefaultDatastore, err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/check/framework_test.go
Expand Up @@ -24,7 +24,7 @@ const (
defaultDC = "DC0"
defaultVMPath = "/DC0/vm/"
defaultHost = "H0"
defaultHostId = "host-21" // Generated by vcsim
defaultHostId = "host-24" // Generated by vcsim
defaultHostPath = "/DC0/host/DC0_"
)

Expand Down
@@ -1,7 +1,12 @@
This directory contains simulated vCenter environment, generated using vcsim and govc v0.23.1.

# Run vcsim with default settings, i.e. with 1 host and 2 VMs
$ vcsim -tls=false
# Run vcsim with following settings.
$ vcsim -tls=false -dc 2 -folder 1 -ds 4 -pod 1 -nsx 2 -pool 2 -app 1 -vm 2

# this will give us a datastore cluster with no datastores inside it.
# we should move some datastores in the datastore cluster
$ govc object.mv /DC0/datastore/LocalDS_2 /DC0/datastore/DC0_POD0
$ govc object.mv /DC0/datastore/LocalDS_3 /DC0/datastore/DC0_POD0

# Save simulated objects in a directory.
$ govc object.save
Expand Down
Expand Up @@ -7,8 +7,8 @@
<propertyCollector type="PropertyCollector">propertyCollector</propertyCollector>
<viewManager type="ViewManager">ViewManager</viewManager>
<about>
<name>VMware vCenter Server (govmomi simulator)</name>
<fullName>VMware vCenter Server 6.5.0 build-5973321</fullName>
<name>VMware vCenter Server</name>
<fullName>VMware vCenter Server 6.5.0 build-5973321 (govmomi simulator)</fullName>
<vendor>VMware, Inc.</vendor>
<version>6.5.0</version>
<build>5973321</build>
Expand Down
2 changes: 2 additions & 0 deletions pkg/check/testdata/default/0001-Folder-group-d1.xml
Expand Up @@ -84,6 +84,7 @@
<name>childEntity</name>
<val xmlns:XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" XMLSchema-instance:type="ArrayOfManagedObjectReference">
<ManagedObjectReference type="Datacenter">datacenter-2</ManagedObjectReference>
<ManagedObjectReference type="Folder">folder-58</ManagedObjectReference>
</val>
</propSet>
<propSet>
Expand Down Expand Up @@ -170,6 +171,7 @@
<name>childEntity</name>
<val xmlns:XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" XMLSchema-instance:type="ArrayOfManagedObjectReference">
<ManagedObjectReference type="Datacenter">datacenter-2</ManagedObjectReference>
<ManagedObjectReference type="Folder">folder-58</ManagedObjectReference>
</val>
</propSet>
</ObjectContent>
24 changes: 12 additions & 12 deletions pkg/check/testdata/default/0006-SessionManager-SessionManager.xml
Expand Up @@ -4,34 +4,34 @@
<name>sessionList</name>
<val xmlns:XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" XMLSchema-instance:type="ArrayOfUserSession">
<UserSession>
<key>acdb9fd2-8a1c-48ca-9c78-bcb24f5fa421</key>
<userName>user</userName>
<fullName>user</fullName>
<loginTime>2021-01-08T16:09:12.979769656Z</loginTime>
<lastActiveTime>2021-01-08T17:09:12.982313915+01:00</lastActiveTime>
<key>b7b1563d-7aca-41d6-bca4-774c66067b60</key>
<userName>administrator@vsphere.local</userName>
<fullName>administrator@vsphere.local</fullName>
<loginTime>2021-02-10T04:00:17.990208341Z</loginTime>
<lastActiveTime>2021-02-09T23:00:37.69791866-05:00</lastActiveTime>
<locale>en_US</locale>
<messageLocale>en_US</messageLocale>
<extensionSession>false</extensionSession>
<ipAddress>127.0.0.1</ipAddress>
<userAgent>govc/0.23.0</userAgent>
<callCount>4</callCount>
<callCount>112</callCount>
</UserSession>
</val>
</propSet>
<propSet>
<name>currentSession</name>
<val xmlns:XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" XMLSchema-instance:type="UserSession">
<key>acdb9fd2-8a1c-48ca-9c78-bcb24f5fa421</key>
<userName>user</userName>
<fullName>user</fullName>
<loginTime>2021-01-08T16:09:12.979769656Z</loginTime>
<lastActiveTime>2021-01-08T17:09:12.982313915+01:00</lastActiveTime>
<key>b7b1563d-7aca-41d6-bca4-774c66067b60</key>
<userName>administrator@vsphere.local</userName>
<fullName>administrator@vsphere.local</fullName>
<loginTime>2021-02-10T04:00:17.990208341Z</loginTime>
<lastActiveTime>2021-02-09T23:00:37.69791866-05:00</lastActiveTime>
<locale>en_US</locale>
<messageLocale>en_US</messageLocale>
<extensionSession>false</extensionSession>
<ipAddress>127.0.0.1</ipAddress>
<userAgent>govc/0.23.0</userAgent>
<callCount>4</callCount>
<callCount>112</callCount>
</val>
</propSet>
<propSet>
Expand Down
30 changes: 30 additions & 0 deletions pkg/check/testdata/default/0012-EventManager-EventManager.xml
Expand Up @@ -353,6 +353,36 @@
<formatOnVm></formatOnVm>
<fullFormat>Completed the relocation of the virtual machine</fullFormat>
</eventInfo>
<eventInfo>
<key>CustomizationFailed</key>
<description>An error occurred during customization</description>
<category>info</category>
<formatOnDatacenter></formatOnDatacenter>
<formatOnComputeResource></formatOnComputeResource>
<formatOnHost></formatOnHost>
<formatOnVm></formatOnVm>
<fullFormat>An error occurred during customization on VM {{.Vm.Name}}</fullFormat>
</eventInfo>
<eventInfo>
<key>CustomizationStartedEvent</key>
<description>Started customization</description>
<category>info</category>
<formatOnDatacenter></formatOnDatacenter>
<formatOnComputeResource></formatOnComputeResource>
<formatOnHost></formatOnHost>
<formatOnVm></formatOnVm>
<fullFormat>Started customization of VM {{.Vm.Name}}</fullFormat>
</eventInfo>
<eventInfo>
<key>CustomizationSucceeded</key>
<description>Customization succeeded</description>
<category>info</category>
<formatOnDatacenter></formatOnDatacenter>
<formatOnComputeResource></formatOnComputeResource>
<formatOnHost></formatOnHost>
<formatOnVm></formatOnVm>
<fullFormat>Customization of VM {{.Vm.Name}} succeeded</fullFormat>
</eventInfo>
<eventInfo>
<key>DrsVmMigratedEvent</key>
<description>DRS VM migrated</description>
Expand Down