Skip to content
Permalink
Browse files

Upstream: smooth weighted round-robin balancing.

For edge case weights like { 5, 1, 1 } we now produce { a, a, b, a, c, a, a }
sequence instead of { c, b, a, a, a, a, a } produced previously.

Algorithm is as follows: on each peer selection we increase current_weight
of each eligible peer by its weight, select peer with greatest current_weight
and reduce its current_weight by total number of weight points distributed
among peers.

In case of { 5, 1, 1 } weights this gives the following sequence of
current_weight's:

     a  b  c
     0  0  0  (initial state)

     5  1  1  (a selected)
    -2  1  1

     3  2  2  (a selected)
    -4  2  2

     1  3  3  (b selected)
     1 -4  3

     6 -3  4  (a selected)
    -1 -3  4

     4 -2  5  (c selected)
     4 -2 -2

     9 -1 -1  (a selected)
     2 -1 -1

     7  0  0  (a selected)
     0  0  0

To preserve weight reduction in case of failures the effective_weight
variable was introduced, which usually matches peer's weight, but is
reduced temporarily on peer failures.

This change also fixes loop with backup servers and proxy_next_upstream
http_404 (ticket #47), and skipping alive upstreams in some cases if there
are multiple dead ones (ticket #64).


git-svn-id: svn://svn.nginx.org/nginx/trunk@4622 73f98a42-aea0-e011-b76d-00259023448c
  • Loading branch information...
mdounin
mdounin committed May 14, 2012
1 parent 35b553d commit 27e94984486058d73157038f7950a0a36ecc6e35
Showing with 72 additions and 169 deletions.
  1. +71 −169 src/http/ngx_http_upstream_round_robin.c
  2. +1 −0 src/http/ngx_http_upstream_round_robin.h
@@ -12,8 +12,8 @@

static ngx_int_t ngx_http_upstream_cmp_servers(const void *one,
const void *two);
static ngx_uint_t
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers);
static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer(
ngx_http_upstream_rr_peer_data_t *rrp);

#if (NGX_HTTP_SSL)

@@ -81,7 +81,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
peers->peer[n].fail_timeout = server[i].fail_timeout;
peers->peer[n].down = server[i].down;
peers->peer[n].weight = server[i].down ? 0 : server[i].weight;
peers->peer[n].current_weight = peers->peer[n].weight;
peers->peer[n].effective_weight = peers->peer[n].weight;
peers->peer[n].current_weight = 0;
n++;
}
}
@@ -131,7 +132,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
backup->peer[n].socklen = server[i].addrs[j].socklen;
backup->peer[n].name = server[i].addrs[j].name;
backup->peer[n].weight = server[i].weight;
backup->peer[n].current_weight = server[i].weight;
backup->peer[n].effective_weight = server[i].weight;
backup->peer[n].current_weight = 0;
backup->peer[n].max_fails = server[i].max_fails;
backup->peer[n].fail_timeout = server[i].fail_timeout;
backup->peer[n].down = server[i].down;
@@ -190,7 +192,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
peers->peer[i].socklen = u.addrs[i].socklen;
peers->peer[i].name = u.addrs[i].name;
peers->peer[i].weight = 1;
peers->peer[i].current_weight = 1;
peers->peer[i].effective_weight = 1;
peers->peer[i].current_weight = 0;
peers->peer[i].max_fails = 1;
peers->peer[i].fail_timeout = 10;
}
@@ -306,7 +309,8 @@ ngx_http_upstream_create_round_robin_peer(ngx_http_request_t *r,
peers->peer[0].socklen = ur->socklen;
peers->peer[0].name = ur->host;
peers->peer[0].weight = 1;
peers->peer[0].current_weight = 1;
peers->peer[0].effective_weight = 1;
peers->peer[0].current_weight = 0;
peers->peer[0].max_fails = 1;
peers->peer[0].fail_timeout = 10;

@@ -338,7 +342,8 @@ ngx_http_upstream_create_round_robin_peer(ngx_http_request_t *r,
peers->peer[i].name.len = len;
peers->peer[i].name.data = p;
peers->peer[i].weight = 1;
peers->peer[i].current_weight = 1;
peers->peer[i].effective_weight = 1;
peers->peer[i].current_weight = 0;
peers->peer[i].max_fails = 1;
peers->peer[i].fail_timeout = 10;
}
@@ -378,8 +383,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
{
ngx_http_upstream_rr_peer_data_t *rrp = data;

time_t now;
uintptr_t m;
ngx_int_t rc;
ngx_uint_t i, n;
ngx_connection_t *c;
@@ -389,8 +392,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, try: %ui", pc->tries);

now = ngx_time();

/* ngx_lock_mutex(rrp->peers->mutex); */

if (rrp->peers->last_cached) {
@@ -423,118 +424,15 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)

/* there are several peers */

if (pc->tries == rrp->peers->number) {

/* it's a first try - get a current peer */

i = pc->tries;

for ( ;; ) {
rrp->current = ngx_http_upstream_get_peer(rrp->peers);

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, current: %ui %i",
rrp->current,
rrp->peers->peer[rrp->current].current_weight);

n = rrp->current / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));

if (!(rrp->tried[n] & m)) {
peer = &rrp->peers->peer[rrp->current];

if (!peer->down) {

if (peer->max_fails == 0
|| peer->fails < peer->max_fails)
{
break;
}
peer = ngx_http_upstream_get_peer(rrp);

if (now - peer->checked > peer->fail_timeout) {
peer->checked = now;
break;
}

peer->current_weight = 0;

} else {
rrp->tried[n] |= m;
}

pc->tries--;
}

if (pc->tries == 0) {
goto failed;
}

if (--i == 0) {
ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
"round robin upstream stuck on %ui tries",
pc->tries);
goto failed;
}
}

peer->current_weight--;

} else {

i = pc->tries;

for ( ;; ) {
n = rrp->current / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));

if (!(rrp->tried[n] & m)) {

peer = &rrp->peers->peer[rrp->current];

if (!peer->down) {

if (peer->max_fails == 0
|| peer->fails < peer->max_fails)
{
break;
}

if (now - peer->checked > peer->fail_timeout) {
peer->checked = now;
break;
}

peer->current_weight = 0;

} else {
rrp->tried[n] |= m;
}

pc->tries--;
}

rrp->current++;

if (rrp->current >= rrp->peers->number) {
rrp->current = 0;
}

if (pc->tries == 0) {
goto failed;
}

if (--i == 0) {
ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
"round robin upstream stuck on %ui tries",
pc->tries);
goto failed;
}
}

peer->current_weight--;
if (peer == NULL) {
goto failed;
}

rrp->tried[n] |= m;
ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, current: %ui %i",
rrp->current, peer->current_weight);
}

pc->sockaddr = peer->sockaddr;
@@ -545,11 +443,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)

if (pc->tries == 1 && rrp->peers->next) {
pc->tries += rrp->peers->next->number;

n = rrp->peers->next->number / (8 * sizeof(uintptr_t)) + 1;
for (i = 0; i < n; i++) {
rrp->tried[i] = 0;
}
}

return NGX_OK;
@@ -595,56 +488,71 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
}


static ngx_uint_t
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers)
static ngx_http_upstream_rr_peer_t *
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp)
{
ngx_uint_t i, n, reset = 0;
ngx_http_upstream_rr_peer_t *peer;
time_t now;
uintptr_t m;
ngx_int_t total;
ngx_uint_t i, n;
ngx_http_upstream_rr_peer_t *peer, *best;

peer = &peers->peer[0];
now = ngx_time();

for ( ;; ) {
best = NULL;
total = 0;

for (i = 0; i < peers->number; i++) {
for (i = 0; i < rrp->peers->number; i++) {

if (peer[i].current_weight <= 0) {
continue;
}
n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));

n = i;

while (i < peers->number - 1) {

i++;
if (rrp->tried[n] & m) {
continue;
}

if (peer[i].current_weight <= 0) {
continue;
}
peer = &rrp->peers->peer[i];

if (peer[n].current_weight * 1000 / peer[i].current_weight
> peer[n].weight * 1000 / peer[i].weight)
{
return n;
}
if (peer->down) {
continue;
}

n = i;
}
if (peer->max_fails
&& peer->fails >= peer->max_fails
&& now - peer->checked <= peer->fail_timeout)
{
continue;
}

if (peer[i].current_weight > 0) {
n = i;
}
peer->current_weight += peer->effective_weight;
total += peer->effective_weight;

return n;
if (peer->effective_weight < peer->weight) {
peer->effective_weight++;
}

if (reset++) {
return 0;
if (best == NULL || peer->current_weight > best->current_weight) {
best = peer;
}
}

for (i = 0; i < peers->number; i++) {
peer[i].current_weight = peer[i].weight;
}
if (best == NULL) {
return NULL;
}

i = best - &rrp->peers->peer[0];

rrp->current = i;

n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));

rrp->tried[n] |= m;

best->current_weight -= total;
best->checked = now;

return best;
}


@@ -683,15 +591,15 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
peer->checked = now;

if (peer->max_fails) {
peer->current_weight -= peer->weight / peer->max_fails;
peer->effective_weight -= peer->weight / peer->max_fails;
}

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"free rr peer failed: %ui %i",
rrp->current, peer->current_weight);
rrp->current, peer->effective_weight);

if (peer->current_weight < 0) {
peer->current_weight = 0;
if (peer->effective_weight < 0) {
peer->effective_weight = 0;
}

/* ngx_unlock_mutex(rrp->peers->mutex); */
@@ -705,12 +613,6 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
}
}

rrp->current++;

if (rrp->current >= rrp->peers->number) {
rrp->current = 0;
}

if (pc->tries) {
pc->tries--;
}
@@ -20,6 +20,7 @@ typedef struct {
ngx_str_t name;

ngx_int_t current_weight;
ngx_int_t effective_weight;
ngx_int_t weight;

ngx_uint_t fails;

5 comments on commit 27e9498

@htlhenry

This comment has been minimized.

Copy link

htlhenry replied May 24, 2016

hello, 看了大牛的nginx加权轮询算法剖析,有关那个平滑的加权轮询算法,current_weight -= total,为什么要这么做呢,有些想不明白,还望点一下

@supertim

This comment has been minimized.

Copy link

supertim replied Nov 18, 2016

不减不就一直再涨吗!

@arganzheng

This comment has been minimized.

Copy link

arganzheng replied Aug 7, 2017

有点不明白为什么对peer->fails的读写不需要加读写锁呢?这里面应该有个多线程并发问题吧?

@thetrapest

This comment has been minimized.

Copy link

thetrapest replied Mar 6, 2018

每次选择了一个点,减掉total是为了降低它的那个值,不然就一直再次选择它了

@bigbuger

This comment has been minimized.

Copy link

bigbuger replied Nov 10, 2018

Why it work? I mean that:
Why after t round (where t = sum(w[i]), w[i] is the weight or node i), each node i exactly has selected w[i] time?

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.