Skip to content

Commit

Permalink
Refactor service load balancer to support different strategies
Browse files Browse the repository at this point in the history
This is a change to add the "power of two random choices" load
balancing strategy to the service's load balancer.

This change does not actually start using the new strategy, and is
backwards incompatable, but intended to start a design-review. To fully
add support, many tests will need to be updated, so this change is meant
to highlight the core changes to support the new strategy.

Currently the only load balancing strategy available within traefik is
(weighted) round robin. While this is a good strategy in general, it can
be problematic if one or more of the backends is getting overloaded.

There are many load balancing strategies, but the
"power-of-two-random-choices" algorithm has some good properties that
make it a good general use algorithm.

Specifically some of the benefits include:
 - reducing the total number of requests to slower backends
 - constant time backend picking (in the general case)
 - reduced "herd behaviour" compared to e.g. "least connections" load
   balancing.

The algorithm works by taking two backends at random, and choosing the
backend that has the fewest in-flight requests. In order to do this, we
have to track the number of in-flight requests each backend is currently
processing.

The aim of this change is to demonstrate that this new load balancing
strategy can be added with minimal changes, and reusing a lot of the
existing load balancing code by factoring out the explicit strategy into
an interface.

In order to do this, the wrr package was removed, and the existing
LoadBalancer was moved to the parent directory: the loadbalancer
package.

There are many strategies that can be used for load balancing, many of
which require "extrinsic" information, such as the CPU load on the
backends. This change is not meant to open the door for adding such
strategies, but attempts to add an effective load balancing strategy
with low cost to the codebase.
  • Loading branch information
= committed Mar 20, 2024
1 parent c1ef742 commit c92a1ed
Show file tree
Hide file tree
Showing 6 changed files with 559 additions and 143 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package wrr
package loadbalancer

import (
"container/heap"
"context"
"errors"
"hash/fnv"
"net/http"
"strconv"
"sync"
"sync/atomic"

"github.com/rs/zerolog/log"
"github.com/traefik/traefik/v3/pkg/config/dynamic"
Expand All @@ -18,6 +18,13 @@ type namedHandler struct {
name string
weight float64
deadline float64
inflight atomic.Int64
}

func (h *namedHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
h.inflight.Add(1)
defer h.inflight.Add(-1)
h.Handler.ServeHTTP(w, req)
}

type stickyCookie struct {
Expand All @@ -41,36 +48,74 @@ func convertSameSite(sameSite string) http.SameSite {
}
}

// Balancer is a WeightedRoundRobin load balancer based on Earliest Deadline First (EDF).
// (https://en.wikipedia.org/wiki/Earliest_deadline_first_scheduling)
// Each pick from the schedule has the earliest deadline entry selected.
// Entries have deadlines set at currentDeadline + 1 / weight,
// providing weighted round-robin behavior with floating point weights and an O(log n) pick time.
// strategy is an interface that can be used to implement different load balancing strategies
// for the Balancer

Check failure on line 52 in pkg/server/service/loadbalancer/balancer.go

View workflow job for this annotation

GitHub Actions / validate

Comment should end in a period (godot)
type strategy interface {
// nextServer returns the next server to serve a request, this is called under the handlersMu lock.
// Each pick from the schedule has the earliest deadline entry selected. The status param is a
// map of the currently healthy child services.
nextServer(status map[string]struct{}) *namedHandler
// Entries have deadlines set at currentDeadline + 1 / weight,
// add adds a handler to the balancing algorithm, this is called under the handlersMu lock.
// providing weighted round-robin behavior with floating point weights and an O(log n) pick time.
add(h *namedHandler)

setUp(name string, up bool)

name() string
len() int
}

type Balancer struct {
stickyCookie *stickyCookie
wantsHealthCheck bool

handlersMu sync.RWMutex
// References all the handlers by name and also by the hashed value of the name.
handlerMap map[string]*namedHandler
handlers []*namedHandler
curDeadline float64
// status is a record of which child services of the Balancer are healthy, keyed
// by name of child service. A service is initially added to the map when it is
// created via Add, and it is later removed or added to the map as needed,
// through the SetStatus method.
// References all the handlers by name and also by the hashed value of the
// name.
handlerMap map[string]*namedHandler
// status is a record of which child services of the Balancer are healthy,
// keyed by name of child service. A service is initially added to the map
// when it is created via Add, and it is later removed or added to the map
// as needed, through the SetStatus method.
status map[string]struct{}

// strategy references the load balancing strategy to be used. The add and
// nextServer method must be called under the handlersMu lock
strategy strategy

// updaters is the list of hooks that are run (to update the Balancer
// parent(s)), whenever the Balancer status changes.
updaters []func(bool)
}

// New creates a new load balancer.
func New(sticky *dynamic.Sticky, wantHealthCheck bool) *Balancer {
// NewWRR creates a WeightedRoundRobin load balancer based on Earliest Deadline
// First (EDF).
// (https://en.wikipedia.org/wiki/Earliest_deadline_first_scheduling)
// Each pick from the schedule has the earliest deadline entry selected.
// Entries have deadlines set at currentDeadline + 1 / weight,
// providing weighted round-robin behavior with floating point weights and an
// O(log n) pick time.
func NewWRR(sticky *dynamic.Sticky, wantHealthCheck bool) *Balancer {
return newBalancer(sticky, wantHealthCheck, newStrategyWRR())
}

// NewP2C creates a "the power-of-two-random-choices" algorithm for load
// balancing. The idea of this is two take two of the backends at random from
// the available backends, and select the backend that has the fewest in-flight
// requests. This is constant time when picking, and has more beneficial "herd"
// behaviour than the "fewest connections" algorithm.

Check failure on line 107 in pkg/server/service/loadbalancer/balancer.go

View workflow job for this annotation

GitHub Actions / validate

`behaviour` is a misspelling of `behavior` (misspell)
func NewP2C(sticky *dynamic.Sticky, wantHealthCheck bool) *Balancer {
return newBalancer(sticky, wantHealthCheck, newStrategyP2C())
}

func newBalancer(sticky *dynamic.Sticky, wantHealthCheck bool, strategy strategy) *Balancer {

Check failure on line 112 in pkg/server/service/loadbalancer/balancer.go

View workflow job for this annotation

GitHub Actions / validate

unnecessary leading newline (whitespace)

balancer := &Balancer{
status: make(map[string]struct{}),
handlerMap: make(map[string]*namedHandler),
wantsHealthCheck: wantHealthCheck,
strategy: strategy,
}
if sticky != nil && sticky.Cookie != nil {
balancer.stickyCookie = &stickyCookie{
Expand All @@ -85,37 +130,6 @@ func New(sticky *dynamic.Sticky, wantHealthCheck bool) *Balancer {
return balancer
}

// Len implements heap.Interface/sort.Interface.
func (b *Balancer) Len() int { return len(b.handlers) }

// Less implements heap.Interface/sort.Interface.
func (b *Balancer) Less(i, j int) bool {
return b.handlers[i].deadline < b.handlers[j].deadline
}

// Swap implements heap.Interface/sort.Interface.
func (b *Balancer) Swap(i, j int) {
b.handlers[i], b.handlers[j] = b.handlers[j], b.handlers[i]
}

// Push implements heap.Interface for pushing an item into the heap.
func (b *Balancer) Push(x interface{}) {
h, ok := x.(*namedHandler)
if !ok {
return
}

b.handlers = append(b.handlers, h)
}

// Pop implements heap.Interface for popping an item from the heap.
// It panics if b.Len() < 1.
func (b *Balancer) Pop() interface{} {
h := b.handlers[len(b.handlers)-1]
b.handlers = b.handlers[0 : len(b.handlers)-1]
return h
}

// SetStatus sets on the balancer that its given child is now of the given
// status. balancerName is only needed for logging purposes.
func (b *Balancer) SetStatus(ctx context.Context, childName string, up bool) {
Expand All @@ -137,6 +151,8 @@ func (b *Balancer) SetStatus(ctx context.Context, childName string, up bool) {
delete(b.status, childName)
}

b.strategy.setUp(childName, up)

upAfter := len(b.status) > 0
status = "DOWN"
if upAfter {
Expand Down Expand Up @@ -174,26 +190,13 @@ func (b *Balancer) nextServer() (*namedHandler, error) {
b.handlersMu.Lock()
defer b.handlersMu.Unlock()

if len(b.handlers) == 0 || len(b.status) == 0 {
if b.strategy.len() == 0 || len(b.status) == 0 {
return nil, errNoAvailableServer
}

var handler *namedHandler
for {
// Pick handler with closest deadline.
handler = heap.Pop(b).(*namedHandler)

// curDeadline should be handler's deadline so that new added entry would have a fair competition environment with the old ones.
b.curDeadline = handler.deadline
handler.deadline += 1 / handler.weight

heap.Push(b, handler)
if _, ok := b.status[handler.name]; ok {
break
}
}
handler := b.strategy.nextServer(b.status)

log.Debug().Msgf("Service selected by WRR: %s", handler.name)
log.Debug().Msgf("Service selected by strategy %q: %s", b.strategy.name(), handler.name)
return handler, nil
}

Expand Down Expand Up @@ -263,8 +266,7 @@ func (b *Balancer) Add(name string, handler http.Handler, weight *int) {
h := &namedHandler{Handler: handler, name: name, weight: float64(w)}

b.handlersMu.Lock()
h.deadline = b.curDeadline + 1/h.weight
heap.Push(b, h)
b.strategy.add(h)
b.status[name] = struct{}{}
b.handlerMap[name] = h
b.handlerMap[hash(name)] = h
Expand Down

0 comments on commit c92a1ed

Please sign in to comment.