Skip to content

Commit

Permalink
Upstream: smooth weighted round-robin balancing.
Browse files Browse the repository at this point in the history
For edge case weights like { 5, 1, 1 } we now produce { a, a, b, a, c, a, a }
sequence instead of { c, b, a, a, a, a, a } produced previously.

Algorithm is as follows: on each peer selection we increase current_weight
of each eligible peer by its weight, select peer with greatest current_weight
and reduce its current_weight by total number of weight points distributed
among peers.

In case of { 5, 1, 1 } weights this gives the following sequence of
current_weight's:

     a  b  c
     0  0  0  (initial state)

     5  1  1  (a selected)
    -2  1  1

     3  2  2  (a selected)
    -4  2  2

     1  3  3  (b selected)
     1 -4  3

     6 -3  4  (a selected)
    -1 -3  4

     4 -2  5  (c selected)
     4 -2 -2

     9 -1 -1  (a selected)
     2 -1 -1

     7  0  0  (a selected)
     0  0  0

To preserve weight reduction in case of failures the effective_weight
variable was introduced, which usually matches peer's weight, but is
reduced temporarily on peer failures.

This change also fixes loop with backup servers and proxy_next_upstream
http_404 (ticket #47), and skipping alive upstreams in some cases if there
are multiple dead ones (ticket #64).


git-svn-id: svn://svn.nginx.org/nginx/trunk@4622 73f98a42-aea0-e011-b76d-00259023448c
  • Loading branch information
mdounin committed May 14, 2012
1 parent 35b553d commit 27e9498
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 169 deletions.
240 changes: 71 additions & 169 deletions src/http/ngx_http_upstream_round_robin.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

static ngx_int_t ngx_http_upstream_cmp_servers(const void *one,
const void *two);
static ngx_uint_t
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers);
static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer(
ngx_http_upstream_rr_peer_data_t *rrp);

#if (NGX_HTTP_SSL)

Expand Down Expand Up @@ -81,7 +81,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
peers->peer[n].fail_timeout = server[i].fail_timeout;
peers->peer[n].down = server[i].down;
peers->peer[n].weight = server[i].down ? 0 : server[i].weight;
peers->peer[n].current_weight = peers->peer[n].weight;
peers->peer[n].effective_weight = peers->peer[n].weight;
peers->peer[n].current_weight = 0;
n++;
}
}
Expand Down Expand Up @@ -131,7 +132,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
backup->peer[n].socklen = server[i].addrs[j].socklen;
backup->peer[n].name = server[i].addrs[j].name;
backup->peer[n].weight = server[i].weight;
backup->peer[n].current_weight = server[i].weight;
backup->peer[n].effective_weight = server[i].weight;
backup->peer[n].current_weight = 0;
backup->peer[n].max_fails = server[i].max_fails;
backup->peer[n].fail_timeout = server[i].fail_timeout;
backup->peer[n].down = server[i].down;
Expand Down Expand Up @@ -190,7 +192,8 @@ ngx_http_upstream_init_round_robin(ngx_conf_t *cf,
peers->peer[i].socklen = u.addrs[i].socklen;
peers->peer[i].name = u.addrs[i].name;
peers->peer[i].weight = 1;
peers->peer[i].current_weight = 1;
peers->peer[i].effective_weight = 1;
peers->peer[i].current_weight = 0;
peers->peer[i].max_fails = 1;
peers->peer[i].fail_timeout = 10;
}
Expand Down Expand Up @@ -306,7 +309,8 @@ ngx_http_upstream_create_round_robin_peer(ngx_http_request_t *r,
peers->peer[0].socklen = ur->socklen;
peers->peer[0].name = ur->host;
peers->peer[0].weight = 1;
peers->peer[0].current_weight = 1;
peers->peer[0].effective_weight = 1;
peers->peer[0].current_weight = 0;
peers->peer[0].max_fails = 1;
peers->peer[0].fail_timeout = 10;

Expand Down Expand Up @@ -338,7 +342,8 @@ ngx_http_upstream_create_round_robin_peer(ngx_http_request_t *r,
peers->peer[i].name.len = len;
peers->peer[i].name.data = p;
peers->peer[i].weight = 1;
peers->peer[i].current_weight = 1;
peers->peer[i].effective_weight = 1;
peers->peer[i].current_weight = 0;
peers->peer[i].max_fails = 1;
peers->peer[i].fail_timeout = 10;
}
Expand Down Expand Up @@ -378,8 +383,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
{
ngx_http_upstream_rr_peer_data_t *rrp = data;

time_t now;
uintptr_t m;
ngx_int_t rc;
ngx_uint_t i, n;
ngx_connection_t *c;
Expand All @@ -389,8 +392,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, try: %ui", pc->tries);

now = ngx_time();

/* ngx_lock_mutex(rrp->peers->mutex); */

if (rrp->peers->last_cached) {
Expand Down Expand Up @@ -423,118 +424,15 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)

/* there are several peers */

if (pc->tries == rrp->peers->number) {

/* it's a first try - get a current peer */

i = pc->tries;

for ( ;; ) {
rrp->current = ngx_http_upstream_get_peer(rrp->peers);

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, current: %ui %i",
rrp->current,
rrp->peers->peer[rrp->current].current_weight);

n = rrp->current / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));

if (!(rrp->tried[n] & m)) {
peer = &rrp->peers->peer[rrp->current];

if (!peer->down) {

if (peer->max_fails == 0
|| peer->fails < peer->max_fails)
{
break;
}
peer = ngx_http_upstream_get_peer(rrp);

if (now - peer->checked > peer->fail_timeout) {
peer->checked = now;
break;
}

peer->current_weight = 0;

} else {
rrp->tried[n] |= m;
}

pc->tries--;
}

if (pc->tries == 0) {
goto failed;
}

if (--i == 0) {
ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
"round robin upstream stuck on %ui tries",
pc->tries);
goto failed;
}
}

peer->current_weight--;

} else {

i = pc->tries;

for ( ;; ) {
n = rrp->current / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));

if (!(rrp->tried[n] & m)) {

peer = &rrp->peers->peer[rrp->current];

if (!peer->down) {

if (peer->max_fails == 0
|| peer->fails < peer->max_fails)
{
break;
}

if (now - peer->checked > peer->fail_timeout) {
peer->checked = now;
break;
}

peer->current_weight = 0;

} else {
rrp->tried[n] |= m;
}

pc->tries--;
}

rrp->current++;

if (rrp->current >= rrp->peers->number) {
rrp->current = 0;
}

if (pc->tries == 0) {
goto failed;
}

if (--i == 0) {
ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
"round robin upstream stuck on %ui tries",
pc->tries);
goto failed;
}
}

peer->current_weight--;
if (peer == NULL) {
goto failed;
}

rrp->tried[n] |= m;
ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, current: %ui %i",
rrp->current, peer->current_weight);
}

pc->sockaddr = peer->sockaddr;
Expand All @@ -545,11 +443,6 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)

if (pc->tries == 1 && rrp->peers->next) {
pc->tries += rrp->peers->next->number;

n = rrp->peers->next->number / (8 * sizeof(uintptr_t)) + 1;
for (i = 0; i < n; i++) {
rrp->tried[i] = 0;
}
}

return NGX_OK;
Expand Down Expand Up @@ -595,56 +488,71 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
}


static ngx_uint_t
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers)
static ngx_http_upstream_rr_peer_t *
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp)
{
ngx_uint_t i, n, reset = 0;
ngx_http_upstream_rr_peer_t *peer;
time_t now;
uintptr_t m;
ngx_int_t total;
ngx_uint_t i, n;
ngx_http_upstream_rr_peer_t *peer, *best;

peer = &peers->peer[0];
now = ngx_time();

for ( ;; ) {
best = NULL;
total = 0;

for (i = 0; i < peers->number; i++) {
for (i = 0; i < rrp->peers->number; i++) {

if (peer[i].current_weight <= 0) {
continue;
}
n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));

n = i;

while (i < peers->number - 1) {

i++;
if (rrp->tried[n] & m) {
continue;
}

if (peer[i].current_weight <= 0) {
continue;
}
peer = &rrp->peers->peer[i];

if (peer[n].current_weight * 1000 / peer[i].current_weight
> peer[n].weight * 1000 / peer[i].weight)
{
return n;
}
if (peer->down) {
continue;
}

n = i;
}
if (peer->max_fails
&& peer->fails >= peer->max_fails
&& now - peer->checked <= peer->fail_timeout)
{
continue;
}

if (peer[i].current_weight > 0) {
n = i;
}
peer->current_weight += peer->effective_weight;
total += peer->effective_weight;

return n;
if (peer->effective_weight < peer->weight) {
peer->effective_weight++;
}

if (reset++) {
return 0;
if (best == NULL || peer->current_weight > best->current_weight) {
best = peer;
}
}

for (i = 0; i < peers->number; i++) {
peer[i].current_weight = peer[i].weight;
}
if (best == NULL) {
return NULL;
}

i = best - &rrp->peers->peer[0];

rrp->current = i;

n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));

rrp->tried[n] |= m;

best->current_weight -= total;
best->checked = now;

return best;
}


Expand Down Expand Up @@ -683,15 +591,15 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
peer->checked = now;

if (peer->max_fails) {
peer->current_weight -= peer->weight / peer->max_fails;
peer->effective_weight -= peer->weight / peer->max_fails;
}

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"free rr peer failed: %ui %i",
rrp->current, peer->current_weight);
rrp->current, peer->effective_weight);

if (peer->current_weight < 0) {
peer->current_weight = 0;
if (peer->effective_weight < 0) {
peer->effective_weight = 0;
}

/* ngx_unlock_mutex(rrp->peers->mutex); */
Expand All @@ -705,12 +613,6 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
}
}

rrp->current++;

if (rrp->current >= rrp->peers->number) {
rrp->current = 0;
}

if (pc->tries) {
pc->tries--;
}
Expand Down
1 change: 1 addition & 0 deletions src/http/ngx_http_upstream_round_robin.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef struct {
ngx_str_t name;

ngx_int_t current_weight;
ngx_int_t effective_weight;
ngx_int_t weight;

ngx_uint_t fails;
Expand Down

6 comments on commit 27e9498

@htlhenry
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hello, 看了大牛的nginx加权轮询算法剖析,有关那个平滑的加权轮询算法,current_weight -= total,为什么要这么做呢,有些想不明白,还望点一下

@supertim
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不减不就一直再涨吗!

@arganzheng
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

有点不明白为什么对peer->fails的读写不需要加读写锁呢?这里面应该有个多线程并发问题吧?

@thetrapest
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

每次选择了一个点,减掉total是为了降低它的那个值,不然就一直再次选择它了

@bigbuger
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why it work? I mean that:
Why after t round (where t = sum(w[i]), w[i] is the weight or node i), each node i exactly has selected w[i] time?

@lxchinesszz
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🐂

Please sign in to comment.