Skip to content

Commit

Permalink
Merge pull request #144 from syself/feature/handle-rate-limits-hcloud
Browse files Browse the repository at this point in the history
✨ Handle rate limit errors in HCloud
  • Loading branch information
batistein committed Apr 8, 2022
2 parents 1a8228c + d15f023 commit 3504e68
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 9 deletions.
7 changes: 7 additions & 0 deletions api/v1beta1/conditions_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,10 @@ const (
// HCloudCredentialsInvalidReason indicates that credentials for HCloud are invalid.
HCloudCredentialsInvalidReason = "HCloudCredentialsInvalid" // #nosec
)

const (
// RateLimitExceeded reports whether the rate limit has been reached.
RateLimitExceeded clusterv1.ConditionType = "RateLimitExceeded"
// RateLimitNotReachedReason indicates that the rate limit is not reached yet.
RateLimitNotReachedReason = "RateLimitNotReached"
)
7 changes: 3 additions & 4 deletions controllers/controllers_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers_test
package controllers

import (
"sync"
Expand All @@ -24,7 +24,6 @@ import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1"
"github.com/syself/cluster-api-provider-hetzner/controllers"
hcloudclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/hcloud/client"
"github.com/syself/cluster-api-provider-hetzner/test/helpers"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -69,15 +68,15 @@ var _ = BeforeSuite(func() {

wg.Add(1)

Expect((&controllers.HetznerClusterReconciler{
Expect((&HetznerClusterReconciler{
Client: testEnv.Manager.GetClient(),
APIReader: testEnv.Manager.GetAPIReader(),
HCloudClientFactory: testEnv.HCloudClientFactory,
WatchFilterValue: "",
TargetClusterManagersWaitGroup: &wg,
}).SetupWithManager(ctx, testEnv.Manager, controller.Options{})).To(Succeed())

Expect((&controllers.HCloudMachineReconciler{
Expect((&HCloudMachineReconciler{
Client: testEnv.Manager.GetClient(),
APIReader: testEnv.Manager.GetAPIReader(),
HCloudClientFactory: testEnv.HCloudClientFactory,
Expand Down
6 changes: 6 additions & 0 deletions controllers/hcloudmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controllers

import (
"context"
"time"

"github.com/pkg/errors"
infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1"
Expand Down Expand Up @@ -139,6 +140,11 @@ func (r *HCloudMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques
}
}()

// check whether rate limit has been reached and if so, then wait.
if wait := reconcileRateLimit(hcloudMachine); wait {
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

if !hcloudMachine.ObjectMeta.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, machineScope)
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/hcloudmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers_test
package controllers

import (
"time"
Expand Down
29 changes: 29 additions & 0 deletions controllers/hetznercluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import (

const (
secretErrorRetryDelay = time.Second * 10
rateLimitWaitTime = 5 * time.Minute
)

// HetznerClusterReconciler reconciles a HetznerCluster object.
Expand Down Expand Up @@ -145,6 +146,11 @@ func (r *HetznerClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque
}
}()

// check whether rate limit has been reached and if so, then wait.
if wait := reconcileRateLimit(hetznerCluster); wait {
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

// Handle deleted clusters
if !hetznerCluster.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, clusterScope)
Expand Down Expand Up @@ -288,6 +294,29 @@ func (r *HetznerClusterReconciler) reconcileDelete(ctx context.Context, clusterS
return reconcile.Result{}, nil
}

// reconcileRateLimit checks whether a rate limit has been reached and returns whether
// the controller should wait a bit more.
func reconcileRateLimit(setter conditions.Setter) bool {
condition := conditions.Get(setter, infrav1.RateLimitExceeded)
if condition != nil && condition.Status == corev1.ConditionTrue {
if time.Now().Before(condition.LastTransitionTime.Time.Add(rateLimitWaitTime)) {
// Not yet timed out, reconcile again after timeout
// Don't give a more precise requeueAfter value to not reconcile too many
// objects at the same time
return true
}
// Wait time is over, we continue
conditions.MarkFalse(
setter,
infrav1.RateLimitExceeded,
infrav1.RateLimitNotReachedReason,
clusterv1.ConditionSeverityInfo,
"wait time is over. Try reconciling again",
)
}
return false
}

func getAndValidateHCloudToken(ctx context.Context, namespace string, hetznerCluster *infrav1.HetznerCluster, secretManager *secretutil.SecretManager) (string, *corev1.Secret, error) {
// retrieve Hetzner secret
secretNamspacedName := types.NamespacedName{Namespace: namespace, Name: hetznerCluster.Spec.HetznerSecret.Name}
Expand Down
39 changes: 38 additions & 1 deletion controllers/hetznercluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers_test
package controllers

import (
"context"
Expand All @@ -34,6 +34,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -315,6 +316,7 @@ var _ = Describe("Hetzner ClusterReconciler", func() {
}, timeout).Should(Equal(len(instance.Spec.ControlPlaneLoadBalancer.ExtraServices)))
})
})

Context("For HetznerMachines belonging to the cluster", func() {
var (
namespace string
Expand Down Expand Up @@ -885,3 +887,38 @@ var _ = Describe("HetznerCluster validation", func() {
})
})
})

var _ = Describe("reconcileRateLimit", func() {
var hetznerCluster *infrav1.HetznerCluster
BeforeEach(func() {
hetznerCluster = &infrav1.HetznerCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "rate-limit-cluster",
Namespace: "default",
},
Spec: getDefaultHetznerClusterSpec(),
}
})

It("returns wait== true if rate limit condition is set and time is not over", func() {
conditions.MarkTrue(hetznerCluster, infrav1.RateLimitExceeded)
Expect(reconcileRateLimit(hetznerCluster)).To(BeTrue())
})

It("returns wait== false if rate limit condition is set and time is over", func() {
conditions.MarkTrue(hetznerCluster, infrav1.RateLimitExceeded)
conditionList := hetznerCluster.GetConditions()
conditionList[0].LastTransitionTime = metav1.NewTime(time.Now().Add(-time.Hour))
Expect(reconcileRateLimit(hetznerCluster)).To(BeFalse())
})

It("returns wait== false if rate limit condition is set to false", func() {
conditions.MarkFalse(hetznerCluster, infrav1.RateLimitExceeded, "", clusterv1.ConditionSeverityInfo, "")
Expect(reconcileRateLimit(hetznerCluster)).To(BeFalse())
})

It("returns wait== false if rate limit condition is not set", func() {
Expect(reconcileRateLimit(hetznerCluster)).To(BeFalse())
})

})
25 changes: 24 additions & 1 deletion pkg/services/hcloud/loadbalancer/loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ func (s *Service) reconcileNetworkAttachement(ctx context.Context, lb *hcloud.Lo
ID: s.scope.HetznerCluster.Status.Network.ID,
},
}); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function AttachServerToNetwork",
)
}
// In case lb is already attached don't raise an error
if hcloud.IsError(err, hcloud.ErrorCodeLoadBalancerAlreadyAttached) {
return nil
Expand Down Expand Up @@ -136,6 +143,14 @@ func (s *Service) reconcileLBProperties(ctx context.Context, lb *hcloud.LoadBala
Name: s.scope.HetznerCluster.Spec.ControlPlaneLoadBalancer.Type,
},
}); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function ChangeLoadBalancerType",
)
return errors.Wrap(err, "rate limit exceeded while changing lb type")
}
multierr = append(multierr, errors.Wrap(err, "failed to change load balancer type"))
}
record.Eventf(s.scope.HetznerCluster, "ChangeLoadBalancerType", "Changed load balancer type")
Expand All @@ -146,7 +161,15 @@ func (s *Service) reconcileLBProperties(ctx context.Context, lb *hcloud.LoadBala
if _, err := s.scope.HCloudClient.ChangeLoadBalancerAlgorithm(ctx, lb, hcloud.LoadBalancerChangeAlgorithmOpts{
Type: hcloud.LoadBalancerAlgorithmType(s.scope.HetznerCluster.Spec.ControlPlaneLoadBalancer.Algorithm),
}); err != nil {
multierr = append(multierr, errors.Wrap(err, "failed to change load balancer type"))
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function ChangeLoadBalancerAlgorithm",
)
return errors.Wrap(err, "rate limit exceeded while changing lb algorithm")
}
multierr = append(multierr, errors.Wrap(err, "failed to change load balancer algorithm"))
}
record.Eventf(s.scope.HetznerCluster, "ChangeLoadBalancerAlgorithm", "Changed load balancer algorithm")
}
Expand Down
23 changes: 22 additions & 1 deletion pkg/services/hcloud/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ func (s *Service) createNetwork(ctx context.Context, spec *infrav1.HCloudNetwork

resp, err := s.scope.HCloudClient.CreateNetwork(ctx, opts)
if err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function CreateNetwork",
)
}
record.Warnf(
s.scope.HetznerCluster,
"NetworkCreatedFailed",
Expand All @@ -121,6 +128,13 @@ func (s *Service) Delete(ctx context.Context) error {
return nil
}
if err := s.scope.HCloudClient.DeleteNetwork(ctx, &hcloud.Network{ID: s.scope.HetznerCluster.Status.Network.ID}); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function DeleteNetwork",
)
}
// If resource has been deleted already then do nothing
if hcloud.IsError(err, hcloud.ErrorCodeNotFound) {
s.scope.V(1).Info("deleting network failed - not found", "id", s.scope.HetznerCluster.Status.Network.ID)
Expand Down Expand Up @@ -148,7 +162,14 @@ func (s *Service) findNetwork(ctx context.Context) (*hcloud.Network, error) {
opts.LabelSelector = utils.LabelsToLabelSelector(s.labels())
networks, err := s.scope.HCloudClient.ListNetworks(ctx, opts)
if err != nil {
return nil, err
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function ListNetworks",
)
}
return nil, errors.Wrap(err, "failed to list networks")
}

if len(networks) > 1 {
Expand Down
38 changes: 37 additions & 1 deletion pkg/services/hcloud/placementgroup/placementgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/syself/cluster-api-provider-hetzner/pkg/scope"
"github.com/syself/cluster-api-provider-hetzner/pkg/utils"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/record"
ctrl "sigs.k8s.io/controller-runtime"
)
Expand Down Expand Up @@ -89,13 +90,29 @@ func (s *Service) Reconcile(ctx context.Context) (err error) {
Type: hcloud.PlacementGroupType(placementGroupSpecMap[pgName].Type),
Labels: map[string]string{clusterTagKey: string(infrav1.ResourceLifecycleOwned)},
}); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function CreatePlacementGroup",
)
return err
}
multierr = append(multierr, err)
}
}

// Delete
for _, pgName := range toDelete {
if err := s.scope.HCloudClient.DeletePlacementGroup(ctx, placementGroupStatusMap[pgName].ID); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function DeletePlacementGroup",
)
return err
}
multierr = append(multierr, err)
}
}
Expand Down Expand Up @@ -123,6 +140,14 @@ func (s *Service) Delete(ctx context.Context) (err error) {
var multierr []error
for _, pg := range s.scope.HetznerCluster.Status.HCloudPlacementGroup {
if err := s.scope.HCloudClient.DeletePlacementGroup(ctx, pg.ID); err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function DeletePlacementGroup",
)
return err
}
if !hcloud.IsError(err, hcloud.ErrorCodeNotFound) {
multierr = append(multierr, err)
}
Expand All @@ -145,7 +170,18 @@ func (s *Service) findPlacementGroups(ctx context.Context) ([]*hcloud.PlacementG
opts := hcloud.PlacementGroupListOpts{}
opts.LabelSelector = utils.LabelsToLabelSelector(labels)

return s.scope.HCloudClient.ListPlacementGroups(ctx, opts)
placementGroups, err := s.scope.HCloudClient.ListPlacementGroups(ctx, opts)
if err != nil {
if hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
conditions.MarkTrue(s.scope.HetznerCluster, infrav1.RateLimitExceeded)
record.Event(s.scope.HetznerCluster,
"RateLimitExceeded",
"exceeded rate limit with calling hcloud function ListPlacementGroups",
)
}
return nil, errors.Wrap(err, "failed to list placement groups")
}
return placementGroups, nil
}

// gets the information of the Hetzner load balancer object and returns it in our status object.
Expand Down

0 comments on commit 3504e68

Please sign in to comment.