Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vsphere creds checking #1186

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/apis/hive/v1/clusterdeployment_types.go
Expand Up @@ -316,6 +316,9 @@ const (

// ProvisionStoppedCondition is set when cluster provisioning is stopped
ProvisionStoppedCondition ClusterDeploymentConditionType = "ProvisionStopped"

// AuthenticationFailureCondition is true when platform credentials cannot be used because of authentication failure
AuthenticationFailureClusterDeploymentCondition ClusterDeploymentConditionType = "AuthenticationFailure"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think there's any opportunity for better synergy with #1109? I can't see any immediately.

)

// AllClusterDeploymentConditions is a slice containing all condition types. This can be used for dealing with
Expand Down
8 changes: 8 additions & 0 deletions pkg/constants/constants.go
Expand Up @@ -6,6 +6,14 @@ import (
)

const (
PlatformAWS = "aws"
PlatformAzure = "azure"
PlatformBaremetal = "baremetal"
PlatformGCP = "gcp"
PlatformOpenStack = "openstack"
PlatformUnknown = "unknown"
PlatformVSphere = "vsphere"

mergedPullSecretSuffix = "merged-pull-secret"

// VeleroBackupEnvVar is the name of the environment variable used to tell the controller manager to enable velero backup integration.
Expand Down
95 changes: 76 additions & 19 deletions pkg/controller/clusterdeployment/clusterdeployment_controller.go
Expand Up @@ -57,6 +57,9 @@ const (
defaultRequeueTime = 10 * time.Second
maxProvisions = 3

platformAuthFailureReason = "PlatformAuthError"
platformAuthSuccessReason = "PlatformAuthSuccess"

clusterImageSetNotFoundReason = "ClusterImageSetNotFound"
clusterImageSetFoundReason = "ClusterImageSetFound"

Expand All @@ -71,14 +74,7 @@ const (
deleteAfterAnnotation = "hive.openshift.io/delete-after" // contains a duration after which the cluster should be cleaned up.
tryInstallOnceAnnotation = "hive.openshift.io/try-install-once"

platformAWS = "aws"
platformAzure = "azure"
platformGCP = "gcp"
platformOpenStack = "openstack"
platformVSphere = "vsphere"
platformBaremetal = "baremetal"
platformUnknown = "unknown"
regionUnknown = "unknown"
regionUnknown = "unknown"
)

var (
Expand Down Expand Up @@ -163,10 +159,11 @@ func Add(mgr manager.Manager) error {
// NewReconciler returns a new reconcile.Reconciler
func NewReconciler(mgr manager.Manager, logger log.FieldLogger, rateLimiter flowcontrol.RateLimiter) reconcile.Reconciler {
r := &ReconcileClusterDeployment{
Client: controllerutils.NewClientWithMetricsOrDie(mgr, ControllerName, &rateLimiter),
scheme: mgr.GetScheme(),
logger: logger,
expectations: controllerutils.NewExpectations(logger),
Client: controllerutils.NewClientWithMetricsOrDie(mgr, ControllerName, &rateLimiter),
scheme: mgr.GetScheme(),
logger: logger,
expectations: controllerutils.NewExpectations(logger),
validateCredentialsForClusterDeployment: controllerutils.ValidateCredentialsForClusterDeployment,
}
r.remoteClusterAPIClientBuilder = func(cd *hivev1.ClusterDeployment) remoteclient.Builder {
return remoteclient.NewBuilder(r.Client, cd, ControllerName)
Expand Down Expand Up @@ -275,6 +272,10 @@ type ReconcileClusterDeployment struct {
// for the remote cluster's API server
remoteClusterAPIClientBuilder func(cd *hivev1.ClusterDeployment) remoteclient.Builder

// validateCredentialsForClusterDeployment is what this controller will call to validate
// that the platform creds are good (used for testing)
validateCredentialsForClusterDeployment func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda wish this could be a separate controller, but obviously we don't want to process with launching an install here until it's been run. I know we typically wouldn't see a condition false unless it had previously been true, but would it make sense in this case? Don't launch unless we see an AuthenticationFailure condition set to False? It would be nice to encapsulate the responsibility for setting this across cloud providers into it's own controller.


protectedDelete bool
}

Expand Down Expand Up @@ -582,6 +583,27 @@ func (r *ReconcileClusterDeployment) reconcile(request reconcile.Request, cd *hi
return *result, err
}

// Sanity check the platform/cloud credentials.
validCreds, err := r.validatePlatformCreds(cd, cdLog)
if err != nil {
cdLog.WithError(err).Error("unable to validate platform credentials")
return reconcile.Result{}, err
}
// Make sure the condition is set properly.
_, err = r.setAuthenticationFailure(cd, validCreds, cdLog)
if err != nil {
cdLog.WithError(err).Error("unable to update clusterdeployment")
return reconcile.Result{}, err
}

// If the platform credentials are no good, return error and go into backoff
authCondition := controllerutils.FindClusterDeploymentCondition(cd.Status.Conditions, hivev1.AuthenticationFailureClusterDeploymentCondition)
if authCondition != nil && authCondition.Status == corev1.ConditionTrue {
authError := errors.New(authCondition.Message)
cdLog.WithError(authError).Error("cannot proceed with provision while platform credentials authentication is failing.")
return reconcile.Result{}, authError
}

imageSet, err := r.getClusterImageSet(cd, cdLog)
if err != nil {
return reconcile.Result{}, err
Expand Down Expand Up @@ -1272,6 +1294,36 @@ func (r *ReconcileClusterDeployment) setDNSNotReadyCondition(cd *hivev1.ClusterD
return r.Status().Update(context.TODO(), cd)
}

func (r *ReconcileClusterDeployment) setAuthenticationFailure(cd *hivev1.ClusterDeployment, authSuccessful bool, cdLog log.FieldLogger) (bool, error) {

var status corev1.ConditionStatus
var reason, message string

if authSuccessful {
status = corev1.ConditionFalse
reason = platformAuthSuccessReason
message = "Platform credentials passed authentication check"
} else {
status = corev1.ConditionTrue
reason = platformAuthFailureReason
message = "Platform credentials failed authentication check"
}

conditions, changed := controllerutils.SetClusterDeploymentConditionWithChangeCheck(
cd.Status.Conditions,
hivev1.AuthenticationFailureClusterDeploymentCondition,
status,
reason,
message,
controllerutils.UpdateConditionIfReasonOrMessageChange)
if !changed {
return false, nil
}
cd.Status.Conditions = conditions

return changed, r.Status().Update(context.TODO(), cd)
}

func (r *ReconcileClusterDeployment) setInstallLaunchErrorCondition(cd *hivev1.ClusterDeployment, status corev1.ConditionStatus, reason string, message string, cdLog log.FieldLogger) error {
conditions, changed := controllerutils.SetClusterDeploymentConditionWithChangeCheck(
cd.Status.Conditions,
Expand Down Expand Up @@ -2113,6 +2165,11 @@ func (r *ReconcileClusterDeployment) deleteOldFailedProvisions(provs []*hivev1.C
}
}

// validatePlatformCreds ensure the platform/cloud credentials are at least good enough to authenticate with
func (r *ReconcileClusterDeployment) validatePlatformCreds(cd *hivev1.ClusterDeployment, logger log.FieldLogger) (bool, error) {
return r.validateCredentialsForClusterDeployment(r.Client, cd, logger)
}

// checkForFailedSync returns true if it finds that the ClusterSync has the Failed condition set
func checkForFailedSync(clusterSync *hiveintv1alpha1.ClusterSync) bool {
for _, cond := range clusterSync.Status.Conditions {
Expand Down Expand Up @@ -2213,19 +2270,19 @@ func (r *ReconcileClusterDeployment) addOwnershipToSecret(cd *hivev1.ClusterDepl
func getClusterPlatform(cd *hivev1.ClusterDeployment) string {
switch {
case cd.Spec.Platform.AWS != nil:
return platformAWS
return constants.PlatformAWS
case cd.Spec.Platform.Azure != nil:
return platformAzure
return constants.PlatformAzure
case cd.Spec.Platform.GCP != nil:
return platformGCP
return constants.PlatformGCP
case cd.Spec.Platform.OpenStack != nil:
return platformOpenStack
return constants.PlatformOpenStack
case cd.Spec.Platform.VSphere != nil:
return platformVSphere
return constants.PlatformVSphere
case cd.Spec.Platform.BareMetal != nil:
return platformBaremetal
return constants.PlatformBaremetal
}
return platformUnknown
return constants.PlatformUnknown
}

// getClusterRegion returns the region of a given ClusterDeployment
Expand Down
Expand Up @@ -118,15 +118,16 @@ func TestClusterDeploymentReconcile(t *testing.T) {
}

tests := []struct {
name string
existing []runtime.Object
pendingCreation bool
expectErr bool
expectedRequeueAfter time.Duration
expectPendingCreation bool
expectConsoleRouteFetch bool
validate func(client.Client, *testing.T)
reconcilerSetup func(*ReconcileClusterDeployment)
name string
existing []runtime.Object
pendingCreation bool
expectErr bool
expectedRequeueAfter time.Duration
expectPendingCreation bool
expectConsoleRouteFetch bool
validate func(client.Client, *testing.T)
reconcilerSetup func(*ReconcileClusterDeployment)
platformCredentialsValidation func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error)
}{
{
name: "Add finalizer",
Expand Down Expand Up @@ -1500,6 +1501,53 @@ func TestClusterDeploymentReconcile(t *testing.T) {
assertConditionReason(t, cd, hivev1.ProvisionStoppedCondition, "InstallAttemptsLimitReached")
},
},
{
name: "auth condition when platform creds are bad",
existing: []runtime.Object{
testClusterDeployment(),
},
platformCredentialsValidation: func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error) {
return false, nil
},
expectErr: true,
validate: func(c client.Client, t *testing.T) {
cd := getCD(c)
require.NotNil(t, cd, "could not get ClusterDeployment")

assertConditionStatus(t, cd, hivev1.AuthenticationFailureClusterDeploymentCondition, corev1.ConditionTrue)
},
},
{
name: "no ClusterProvision when platform creds are bad",
existing: []runtime.Object{
func() *hivev1.ClusterDeployment {
cd := testClusterDeployment()
cd.Status.Conditions = []hivev1.ClusterDeploymentCondition{
{
Status: corev1.ConditionTrue,
Type: hivev1.AuthenticationFailureClusterDeploymentCondition,
Reason: platformAuthFailureReason,
Message: "Platform credentials failed authentication check",
},
}
return cd
}(),
},
platformCredentialsValidation: func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error) {
return false, nil
},
expectErr: true,
validate: func(c client.Client, t *testing.T) {
cd := getCD(c)
require.NotNil(t, cd, "could not get ClusterDeployment")

provisionList := &hivev1.ClusterProvisionList{}
err := c.List(context.TODO(), provisionList, client.InNamespace(cd.Namespace))
require.NoError(t, err, "unexpected error listing ClusterProvisions")

assert.Zero(t, len(provisionList.Items), "expected no ClusterProvision objects when platform creds are bad")
},
},
dgoodwin marked this conversation as resolved.
Show resolved Hide resolved
}

for _, test := range tests {
Expand All @@ -1509,13 +1557,21 @@ func TestClusterDeploymentReconcile(t *testing.T) {
controllerExpectations := controllerutils.NewExpectations(logger)
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

mockRemoteClientBuilder := remoteclientmock.NewMockBuilder(mockCtrl)

if test.platformCredentialsValidation == nil {
test.platformCredentialsValidation = func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error) {
return true, nil
}
}
rcd := &ReconcileClusterDeployment{
Client: fakeClient,
scheme: scheme.Scheme,
logger: logger,
expectations: controllerExpectations,
remoteClusterAPIClientBuilder: func(*hivev1.ClusterDeployment) remoteclient.Builder { return mockRemoteClientBuilder },
Client: fakeClient,
scheme: scheme.Scheme,
logger: logger,
expectations: controllerExpectations,
remoteClusterAPIClientBuilder: func(*hivev1.ClusterDeployment) remoteclient.Builder { return mockRemoteClientBuilder },
validateCredentialsForClusterDeployment: test.platformCredentialsValidation,
}

if test.reconcilerSetup != nil {
Expand Down Expand Up @@ -2161,6 +2217,9 @@ func TestUpdatePullSecretInfo(t *testing.T) {
scheme: scheme.Scheme,
logger: log.WithField("controller", "clusterDeployment"),
remoteClusterAPIClientBuilder: func(*hivev1.ClusterDeployment) remoteclient.Builder { return mockRemoteClientBuilder },
validateCredentialsForClusterDeployment: func(client.Client, *hivev1.ClusterDeployment, log.FieldLogger) (bool, error) {
return true, nil
},
}

_, err := rcd.Reconcile(reconcile.Request{
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/metrics/provision_underway_collector.go
Expand Up @@ -21,6 +21,7 @@ var (
hivev1.DNSNotReadyCondition,
hivev1.InstallLaunchErrorCondition,
hivev1.ProvisionFailedCondition,
hivev1.AuthenticationFailureClusterDeploymentCondition,
}
)

Expand Down
68 changes: 68 additions & 0 deletions pkg/controller/utils/credentials.go
@@ -0,0 +1,68 @@
package utils

import (
"context"

log "github.com/sirupsen/logrus"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"

"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/openshift/installer/pkg/types/vsphere"

hivev1 "github.com/openshift/hive/pkg/apis/hive/v1"
"github.com/openshift/hive/pkg/constants"
)

// ValidateCredentialsForClusterDeployment will attempt to verify that the platform/cloud credentials
// for the given ClusterDeployment are valid.
// Note: It simply checks that the username/password (or equivalent) can authenticate,
// not that the credentials have any specific permissions.
func ValidateCredentialsForClusterDeployment(kubeClient client.Client, cd *hivev1.ClusterDeployment, logger log.FieldLogger) (bool, error) {
secret := &corev1.Secret{}

switch getClusterPlatform(cd) {
case constants.PlatformVSphere:
secretKey := types.NamespacedName{Name: cd.Spec.Platform.VSphere.CredentialsSecretRef.Name, Namespace: cd.Namespace}
if err := kubeClient.Get(context.TODO(), secretKey, secret); err != nil {
logger.WithError(err).Error("failed to read in ClusterDeployment's platform creds")
return false, err
}
return validateVSphereCredentials(cd.Spec.Platform.VSphere.VCenter,
string(secret.Data[constants.UsernameSecretKey]),
string(secret.Data[constants.PasswordSecretKey]),
logger)
default:
// If we have no platform-specific credentials verification
// assume the creds are valid.
return true, nil

}
}

func validateVSphereCredentials(vcenter, username, password string, logger log.FieldLogger) (bool, error) {
_, _, err := vsphere.CreateVSphereClients(context.TODO(), vcenter, username, password)
dgoodwin marked this conversation as resolved.
Show resolved Hide resolved
logger.WithError(err).Warn("failed to validate VSphere credentials")
return err == nil, nil
}

// getClusterPlatform returns the platform of a given ClusterDeployment
func getClusterPlatform(cd *hivev1.ClusterDeployment) string {
switch {
case cd.Spec.Platform.AWS != nil:
return constants.PlatformAWS
case cd.Spec.Platform.Azure != nil:
return constants.PlatformAzure
case cd.Spec.Platform.GCP != nil:
return constants.PlatformGCP
case cd.Spec.Platform.OpenStack != nil:
return constants.PlatformOpenStack
case cd.Spec.Platform.VSphere != nil:
return constants.PlatformVSphere
case cd.Spec.Platform.BareMetal != nil:
return constants.PlatformBaremetal
}
return constants.PlatformUnknown
}