Skip to content

Commit

Permalink
Fix Azure Validation & Finalizer assignment (#288)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvaroaleman committed Jul 30, 2018
1 parent 45068da commit 8372101
Show file tree
Hide file tree
Showing 14 changed files with 124 additions and 71 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ jobs:
name: Clean up machines
command: ./test/tools/integration/cleanup_machines.sh
when: always
no_output_timeout: 25m
- run:
name: Clean up master
command: make -C test/tools/integration destroy
Expand Down
4 changes: 2 additions & 2 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ func main() {
prometheusRegistry := prometheus.NewRegistry()

// before we acquire a lock we actually warm up caches mirroring the state of the API server
machineInformerFactory := machineinformers.NewSharedInformerFactory(machineClient, time.Second*30)
kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClient, time.Second*30)
machineInformerFactory := machineinformers.NewSharedInformerFactory(machineClient, time.Minute*15)
kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClient, time.Minute*15)
kubePublicKubeInformerFactory := kubeinformers.NewFilteredSharedInformerFactory(kubeClient, time.Second*30, metav1.NamespacePublic, nil)
kubeSystemInformerFactory := kubeinformers.NewFilteredSharedInformerFactory(kubeClient, time.Second*30, metav1.NamespaceSystem, nil)
defaultKubeInformerFactory := kubeinformers.NewFilteredSharedInformerFactory(kubeClient, time.Second*30, metav1.NamespaceDefault, nil)
Expand Down
5 changes: 4 additions & 1 deletion pkg/cloudprovider/cloud/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ type Provider interface {
// Create creates a cloud instance according to the given machine
Create(machine *v1alpha1.Machine, update MachineUpdater, userdata string) (instance.Instance, error)

Delete(machine *v1alpha1.Machine, update MachineUpdater, instance instance.Instance) error
// Delete deletes the instance and all associated ressources
// This will always be called on machine deletion, the implemention must check if there is actually
// something to delete and just do nothing if there isn't
Delete(machine *v1alpha1.Machine, update MachineUpdater) error
}

// MachineUpdater defines a function to persist an update to a machine
Expand Down
12 changes: 10 additions & 2 deletions pkg/cloudprovider/provider/aws/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ func (p *provider) Create(machine *v1alpha1.Machine, update cloud.MachineUpdater
Groups: aws.StringSlice(securityGroupIDs),
})
if err != nil {
delErr := p.Delete(machine, update, awsInstance)
delErr := p.Delete(machine, update)
if delErr != nil {
return nil, awsErrorToTerminalError(err, fmt.Sprintf("failed to attach instance %s to security group %s & delete the created instance", aws.StringValue(runOut.Instances[0].InstanceId), defaultSecurityGroupName))
}
Expand All @@ -640,7 +640,15 @@ func (p *provider) Create(machine *v1alpha1.Machine, update cloud.MachineUpdater
return awsInstance, nil
}

func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, instance instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater) error {
instance, err := p.Get(machine)
if err != nil {
if err == cloudprovidererrors.ErrInstanceNotFound {
return nil
}
return err
}

config, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return cloudprovidererrors.TerminalError{
Expand Down
9 changes: 9 additions & 0 deletions pkg/cloudprovider/provider/azure/create_delete_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,15 @@ func getSubnet(ctx context.Context, c *config) (network.Subnet, error) {
return subnetsClient.Get(ctx, c.ResourceGroup, c.VNetName, c.SubnetName, "")
}

func getVirtualNetwork(ctx context.Context, c *config) (network.VirtualNetwork, error) {
virtualNetworksClient, err := getVirtualNetworksClient(c)
if err != nil {
return network.VirtualNetwork{}, err
}

return virtualNetworksClient.Get(ctx, c.ResourceGroup, c.VNetName, "")
}

func createNetworkInterface(ctx context.Context, ifName string, machineUID types.UID, config *config, publicIP *network.PublicIPAddress) (network.Interface, error) {
ifClient, err := getInterfacesClient(config)
if err != nil {
Expand Down
16 changes: 13 additions & 3 deletions pkg/cloudprovider/provider/azure/get_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,23 @@ func getIPClient(c *config) (*network.PublicIPAddressesClient, error) {

func getSubnetsClient(c *config) (*network.SubnetsClient, error) {
var err error
ipClient := network.NewSubnetsClient(c.SubscriptionID)
ipClient.Authorizer, err = auth.NewClientCredentialsConfig(c.ClientID, c.ClientSecret, c.TenantID).Authorizer()
subnetClient := network.NewSubnetsClient(c.SubscriptionID)
subnetClient.Authorizer, err = auth.NewClientCredentialsConfig(c.ClientID, c.ClientSecret, c.TenantID).Authorizer()
if err != nil {
return nil, fmt.Errorf("failed to create authorizer: %s", err.Error())
}

return &ipClient, nil
return &subnetClient, nil
}

func getVirtualNetworksClient(c *config) (*network.VirtualNetworksClient, error) {
var err error
virtualNetworksClient := network.NewVirtualNetworksClient(c.SubscriptionID)
virtualNetworksClient.Authorizer, err = auth.NewClientCredentialsConfig(c.ClientID, c.ClientSecret, c.TenantID).Authorizer()
if err != nil {
return nil, fmt.Errorf("failed to create authorizer: %v", err)
}
return &virtualNetworksClient, nil
}

func getVMClient(c *config) (*compute.VirtualMachinesClient, error) {
Expand Down
43 changes: 31 additions & 12 deletions pkg/cloudprovider/provider/azure/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,17 +333,17 @@ func (p *provider) Create(machine *v1alpha1.Machine, update cloud.MachineUpdater
return nil, fmt.Errorf("failed to generate ssh key: %v", err)
}

if !kuberneteshelper.HasFinalizer(machine, finalizerPublicIP) {
if machine, err = update(machine.Name, func(updatedMachine *v1alpha1.Machine) {
updatedMachine.Finalizers = append(updatedMachine.Finalizers, finalizerPublicIP)
}); err != nil {
return nil, err
}
}
ifaceName := machine.Spec.Name + "-netiface"
publicIPName := ifaceName + "-pubip"
var publicIP *network.PublicIPAddress
if config.AssignPublicIP {
if !kuberneteshelper.HasFinalizer(machine, finalizerPublicIP) {
if machine, err = update(machine.Name, func(updatedMachine *v1alpha1.Machine) {
updatedMachine.Finalizers = append(updatedMachine.Finalizers, finalizerPublicIP)
}); err != nil {
return nil, err
}
}
publicIP, err = createPublicIPAddress(context.TODO(), publicIPName, machine.UID, config)
if err != nil {
return nil, fmt.Errorf("failed to create public IP: %v", err)
Expand Down Expand Up @@ -450,16 +450,22 @@ func (p *provider) Create(machine *v1alpha1.Machine, update cloud.MachineUpdater
return &azureVM{vm: &vm, ipAddresses: ipAddresses, status: status}, nil
}

func (p *provider) Delete(machine *v1alpha1.Machine, update cloud.MachineUpdater, instance instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, update cloud.MachineUpdater) error {
config, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return fmt.Errorf("failed to parse MachineSpec: %v", err)
}

glog.Infof("deleting VM %q", machine.Name)
if err = deleteVMsByMachineUID(context.TODO(), config, machine.UID); err != nil {
return fmt.Errorf("Is failed to remove public IP addresses of machine %q: %v", machine.Name, err)
_, err = p.Get(machine)
// If a defunct VM got created, the `Get` call returns an error - But not because the request
// failed but because the VM has an invalid config hence always delete except on err == cloudprovidererrors.ErrInstanceNotFound
if err == nil || (err != nil && err != cloudprovidererrors.ErrInstanceNotFound) {
glog.Infof("deleting VM %q", machine.Name)
if err = deleteVMsByMachineUID(context.TODO(), config, machine.UID); err != nil {
return fmt.Errorf("failed to delete instance for machine %q: %v", machine.Name, err)
}
}

if machine, err = update(machine.Name, func(updatedMachine *v1alpha1.Machine) {
updatedMachine.Finalizers = kuberneteshelper.RemoveFinalizer(updatedMachine.Finalizers, finalizerVM)
}); err != nil {
Expand Down Expand Up @@ -638,7 +644,7 @@ func (p *provider) GetCloudConfig(spec v1alpha1.MachineSpec) (config string, nam
}

func (p *provider) Validate(spec v1alpha1.MachineSpec) error {
c, _, err := p.getConfig(spec.ProviderConfig)
c, providerCfg, err := p.getConfig(spec.ProviderConfig)
if err != nil {
return fmt.Errorf("failed to parse config: %v", err)
}
Expand Down Expand Up @@ -685,5 +691,18 @@ func (p *provider) Validate(spec v1alpha1.MachineSpec) error {
return fmt.Errorf("failed to list all: %v", err.Error())
}

if _, err := getVirtualNetwork(context.TODO(), c); err != nil {
return fmt.Errorf("failed to get virtual network: %v", err)
}

if _, err := getSubnet(context.TODO(), c); err != nil {
return fmt.Errorf("failed to get subnet: %v", err)
}

_, err = getOSImageReference(providerCfg.OperatingSystem)
if err != nil {
return err
}

return nil
}
10 changes: 9 additions & 1 deletion pkg/cloudprovider/provider/digitalocean/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,15 @@ func (p *provider) Create(machine *v1alpha1.Machine, _ cloud.MachineUpdater, use
return &doInstance{droplet: droplet}, err
}

func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, instance instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater) error {
instance, err := p.Get(machine)
if err != nil {
if err == cloudprovidererrors.ErrInstanceNotFound {
return nil
}
return err
}

c, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return cloudprovidererrors.TerminalError{
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/provider/fake/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,6 @@ func (p *provider) Create(_ *v1alpha1.Machine, _ cloud.MachineUpdater, _ string)
return FakeCloudProviderInstance{}, nil
}

func (p *provider) Delete(_ *v1alpha1.Machine, _ cloud.MachineUpdater, _ instance.Instance) error {
func (p *provider) Delete(_ *v1alpha1.Machine, _ cloud.MachineUpdater) error {
return nil
}
10 changes: 9 additions & 1 deletion pkg/cloudprovider/provider/hetzner/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,15 @@ func (p *provider) Create(machine *v1alpha1.Machine, _ cloud.MachineUpdater, use
return &hetznerServer{server: serverCreateRes.Server}, nil
}

func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, instance instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater) error {
instance, err := p.Get(machine)
if err != nil {
if err == cloudprovidererrors.ErrInstanceNotFound {
return nil
}
return err
}

c, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return cloudprovidererrors.TerminalError{
Expand Down
10 changes: 9 additions & 1 deletion pkg/cloudprovider/provider/openstack/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,15 @@ func (p *provider) Create(machine *v1alpha1.Machine, _ cloud.MachineUpdater, use
return &osInstance{server: &server}, nil
}

func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, instance instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater) error {
instance, err := p.Get(machine)
if err != nil {
if err == cloudprovidererrors.ErrInstanceNotFound {
return nil
}
return err
}

c, _, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return cloudprovidererrors.TerminalError{
Expand Down
10 changes: 9 additions & 1 deletion pkg/cloudprovider/provider/vsphere/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,14 @@ func (p *provider) Create(machine *v1alpha1.Machine, _ cloud.MachineUpdater, use
return VSphereServer{name: virtualMachine.Name(), status: instance.StatusRunning, id: virtualMachine.Reference().Value}, nil
}

func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, _ instance.Instance) error {
func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater) error {
if _, err := p.Get(machine); err != nil {
if err == cloudprovidererrors.ErrInstanceNotFound {
return nil
}
return err
}

config, pc, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return fmt.Errorf("failed to parse config: %v", err)
Expand Down Expand Up @@ -445,6 +452,7 @@ func (p *provider) Delete(machine *v1alpha1.Machine, _ cloud.MachineUpdater, _ i
}

func (p *provider) Get(machine *v1alpha1.Machine) (instance.Instance, error) {

config, _, _, err := p.getConfig(machine.Spec.ProviderConfig)
if err != nil {
return nil, fmt.Errorf("failed to parse config: %v", err)
Expand Down
61 changes: 16 additions & 45 deletions pkg/controller/machine/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,16 +297,27 @@ func (c *Controller) getProviderInstance(prov cloud.Provider, machine *machinev1
return prov.Get(machine)
}

func (c *Controller) deleteProviderInstance(prov cloud.Provider, machine *machinev1alpha1.Machine, instance instance.Instance) error {
return prov.Delete(machine, c.updateMachine, instance)
func (c *Controller) deleteProviderInstance(prov cloud.Provider, machine *machinev1alpha1.Machine) error {
return prov.Delete(machine, c.updateMachine)
}

func (c *Controller) createProviderInstance(prov cloud.Provider, machine *machinev1alpha1.Machine, userdata string) (instance.Instance, error) {
// Ensure finalizer is there
machine, err := c.ensureDeleteFinalizerExists(machine)
if err != nil {
return nil, err
}
return prov.Create(machine, c.updateMachine, userdata)
}

func (c *Controller) validateMachine(prov cloud.Provider, machine *machinev1alpha1.Machine) error {
return prov.Validate(machine.Spec)
err := prov.Validate(machine.Spec)
if err != nil {
c.recorder.Eventf(machine, corev1.EventTypeWarning, "ValidationFailed", "Validation failed: %v", err)
return err
}
c.recorder.Event(machine, corev1.EventTypeNormal, "ValidationSucceeded", "Validation succeeded")
return nil
}

func (c *Controller) syncHandler(key string) error {
Expand Down Expand Up @@ -426,45 +437,12 @@ func (c *Controller) cleanupMachineAfterDeletion(machine *machinev1alpha1.Machin

// deleteMachineAndProviderInstance makes sure that an instance has gone in a series of steps.
func (c *Controller) deleteMachineAndProviderInstance(prov cloud.Provider, machine *machinev1alpha1.Machine) error {
// step 1: get the provider instance.
providerInstance, err := c.getProviderInstance(prov, machine)
if err != nil {
// step 1.1: failed to get instance, because of some unknown error -> return and see if we need to handle a terminal error here
if err != cloudprovidererrors.ErrInstanceNotFound {
return c.updateMachineErrorIfTerminalError(machine, machinev1alpha1.DeleteMachineError, err.Error(), err, fmt.Sprintf("failed to get instance for machine %s after the delete got triggered", machine.Name))
}
glog.V(4).Infof("Provider has no instance for %s. Considering it as deleted", machine.Name)

// step 1.2 the instance could not be found -> it's gone, so we remove the finalizer. This essentially will remove the machine object from the system
return c.cleanupMachineAfterDeletion(machine)
}

// step 2: we still have an instance on the cloud provider
// step 2.1: Check if its in deleting state - if so, the provider normally does some own cleanup. We wait until the instance is completely gone.
if instance.StatusDeleting == providerInstance.Status() {
glog.V(4).Infof("deletion of instance %s got triggered. Waiting until it fully disappears", providerInstance.ID())
return nil
}

// step 2.2: The instance exists at the provider, but its considered dead
if instance.StatusDeleted == providerInstance.Status() {
glog.V(4).Infof("Provider says the instance for %s is deleted", machine.Name)
return c.cleanupMachineAfterDeletion(machine)
}

// step 2.3: delete provider instance
if err = c.deleteProviderInstance(prov, machine, providerInstance); err != nil {
if err := c.deleteProviderInstance(prov, machine); err != nil {
message := fmt.Sprintf("%v. Please manually delete finalizers from the machine object.", err)
c.recorder.Eventf(machine, corev1.EventTypeWarning, "DeletionFailed", "Failed to delete machine: %v", err)
return c.updateMachineErrorIfTerminalError(machine, machinev1alpha1.DeleteMachineError, message, err, "failed to delete machine at cloudprovider")
}

// step 3: remove error message in case it was set
if machine, err = c.clearMachineErrorIfSet(machine); err != nil {
return fmt.Errorf("failed to update machine after removing the delete error: %v", err)
}

return nil
return c.cleanupMachineAfterDeletion(machine)
}

func (c *Controller) ensureInstanceExistsForMachine(prov cloud.Provider, machine *machinev1alpha1.Machine, userdataProvider userdata.Provider, providerConfig *providerconfig.Config) error {
Expand Down Expand Up @@ -496,10 +474,8 @@ func (c *Controller) ensureInstanceExistsForMachine(prov cloud.Provider, machine
if _, errNested := c.updateMachineError(machine, machinev1alpha1.InvalidConfigurationMachineError, err.Error()); errNested != nil {
return fmt.Errorf("failed to update machine error after failed validation: %v", errNested)
}
c.recorder.Eventf(machine, corev1.EventTypeWarning, "ValidationFailed", "Validation failed: %v", err)
return fmt.Errorf("invalid provider config: %v", err)
}
c.recorder.Event(machine, corev1.EventTypeNormal, "ValidationSucceeded", "Validation succeeded")
c.validationCacheMutex.Lock()
c.validationCache[cacheKey] = true
c.validationCacheMutex.Unlock()
Expand Down Expand Up @@ -542,11 +518,6 @@ func (c *Controller) ensureInstanceExistsForMachine(prov cloud.Provider, machine
message := fmt.Sprintf("%v. Unable to create a machine.", err)
return c.updateMachineErrorIfTerminalError(machine, machinev1alpha1.CreateMachineError, message, err, "failed to create machine at cloudprover")
}
// We created the instance, so ensure finalizer is there
machine, err = c.ensureDeleteFinalizerExists(machine)
if err != nil {
return err
}
c.recorder.Event(machine, corev1.EventTypeNormal, "Created", "Successfully created instance")
// remove error message in case it was set
if machine, err = c.clearMachineErrorIfSet(machine); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion test/tools/integration/provision_master.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ if [[ "${1:-deploy_machine_controller}" == "do-not-deploy-machine-controller" ]
fi
if ! ls machine-controller-deployed; then
docker build -t kubermatic/machine-controller:latest .
sed -i -e 's/-worker-count=5/-worker-count=20/g' machine-controller.yaml
sed -i -e 's/-worker-count=5/-worker-count=50/g' machine-controller.yaml
kubectl apply -f machine-controller.yaml
touch machine-controller-deployed
fi
Expand Down

0 comments on commit 8372101

Please sign in to comment.