Skip to content

Commit

Permalink
Statistics for backend open errors
Browse files Browse the repository at this point in the history
Previously, we had zero stats on the cause of backend connection
errors, which made it close to impossible to diagnose such issues in
retrospect (only via log mining).

We now pass an optional backend vsc to vcp and record errors per
backend.

Open errors are really per vcp entry (ip + port or udc path), which
can be shared amongst backends (and even vcls), but we maintain the
counters per backend (and, consequently, per vcl) for simplicity.  It
should be noted though that errors for shared endpoints affect all
backends using them.

Ref varnishcache#2622
  • Loading branch information
nigoroll committed Jun 5, 2018
1 parent f4a6413 commit b6c6dd0
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 13 deletions.
47 changes: 46 additions & 1 deletion bin/varnishd/VSC_vbe.vsc
Expand Up @@ -78,5 +78,50 @@
:level: info
:oneliner: Backend requests sent

.. varnish_vsc_end:: vbe
..
=== Anything below is actually per VCP entry, but collected per
=== backend for simplicity

.. varnish_vsc:: fail
:type: counter
:level: info
:oneliner: Connections failed

Counter of failed opens. Detailed reasons are given in the
fail_* counters (DIAG level) and in Debug VSL.

This counter is the sum of all detailed fail_* counters.

All fail_* counters may be slightly inaccurate for efficiency.

.. varnish_vsc:: fail_eacces
:type: counter
:level: diag
:oneliner: Connections failed with EACCES or EPERM

.. varnish_vsc:: fail_eaddrnotavail
:type: counter
:level: diag
:oneliner: Connections failed with EADDRNOTAVAIL

.. varnish_vsc:: fail_econnrefused
:type: counter
:level: diag
:oneliner: Connections failed with ECONNREFUSED

.. varnish_vsc:: fail_enetunreach
:type: counter
:level: diag
:oneliner: Connections failed with ENETUNREACH

.. varnish_vsc:: fail_etimedout
:type: counter
:level: diag
:oneliner: Connections failed ETIMEDOUT

.. varnish_vsc:: fail_other
:type: counter
:level: diag
:oneliner: Connections failed for other reason

.. varnish_vsc_end:: vbe
3 changes: 1 addition & 2 deletions bin/varnishd/cache/cache_backend.c
Expand Up @@ -113,12 +113,11 @@ vbe_dir_getfd(struct worker *wrk, struct backend *bp, struct busyobj *bo,
bo->htc->doclose = SC_NULL;

FIND_TMO(connect_timeout, tmod, bo, bp);
pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh);
pfd = VTP_Get(bp->tcp_pool, tmod, wrk, force_fresh, bp->vsc);
if (pfd == NULL) {
VSLb(bo->vsl, SLT_FetchError,
"backend %s: fail errno %d (%s)",
VRT_BACKEND_string(bp->director), errno, strerror(errno));
// XXX: Per backend stats ?
VSC_C_main->backend_fail++;
bo->htc = NULL;
return (NULL);
Expand Down
3 changes: 2 additions & 1 deletion bin/varnishd/cache/cache_backend_probe.c
Expand Up @@ -272,7 +272,8 @@ vbp_poke(struct vbp_target *vt)
t_start = t_now = VTIM_real();
t_end = t_start + vt->timeout;

s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa);
s = VTP_Open(vt->tcp_pool, t_end - t_now, (const void **)&sa,
vt->backend->vsc);
if (s < 0) {
/* Got no connection: failed */
return;
Expand Down
44 changes: 37 additions & 7 deletions bin/varnishd/cache/cache_tcp_pool.c
Expand Up @@ -45,6 +45,8 @@
#include "cache_tcp_pool.h"
#include "cache_pool.h"

#include "VSC_vbe.h"

struct conn_pool;

/*--------------------------------------------------------------------
Expand Down Expand Up @@ -373,14 +375,41 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp)
*/

static int
VCP_Open(const struct conn_pool *cp, double tmo, const void **privp)
VCP_Open(const struct conn_pool *cp, double tmo, const void **privp,
struct VSC_vbe *vsc)
{
int r;

CHECK_OBJ_NOTNULL(cp, CONN_POOL_MAGIC);

r = cp->methods->open(cp, tmo, privp);

if (r >= 0 || vsc == NULL)
return (r);

/* stats access unprotected */
switch (errno) {
case EACCES:
case EPERM:
vsc->fail_eacces++;
break;
case EADDRNOTAVAIL:
vsc->fail_eaddrnotavail++;
break;
case ECONNREFUSED:
vsc->fail_econnrefused++;
break;
case ENETUNREACH:
vsc->fail_enetunreach++;
break;
case ETIMEDOUT:
vsc->fail_etimedout++;
break;
default:
vsc->fail_other++;
}
vsc->fail++;

return (r);
}

Expand Down Expand Up @@ -426,7 +455,7 @@ VCP_Close(struct pfd **pfdp)

static struct pfd *
VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
unsigned force_fresh)
unsigned force_fresh, struct VSC_vbe *vsc)
{
struct pfd *pfd;

Expand Down Expand Up @@ -459,7 +488,7 @@ VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk,
INIT_OBJ(pfd->waited, WAITED_MAGIC);
pfd->state = PFD_STATE_USED;
pfd->conn_pool = cp;
pfd->fd = VCP_Open(cp, tmo, &pfd->priv);
pfd->fd = VCP_Open(cp, tmo, &pfd->priv, vsc);
if (pfd->fd < 0) {
FREE_OBJ(pfd);
Lck_Lock(&cp->mtx);
Expand Down Expand Up @@ -737,9 +766,10 @@ VTP_Rel(struct tcp_pool **tpp)
*/

int
VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp)
VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp,
struct VSC_vbe *vsc)
{
return (VCP_Open(tp->cp, tmo, privp));
return (VCP_Open(tp->cp, tmo, privp, vsc));
}

/*--------------------------------------------------------------------
Expand Down Expand Up @@ -770,10 +800,10 @@ VTP_Close(struct pfd **pfdp)

struct pfd *
VTP_Get(struct tcp_pool *tp, double tmo, struct worker *wrk,
unsigned force_fresh)
unsigned force_fresh, struct VSC_vbe *vsc)
{

return VCP_Get(tp->cp, tmo, wrk, force_fresh);
return VCP_Get(tp->cp, tmo, wrk, force_fresh, vsc);
}

/*--------------------------------------------------------------------
Expand Down
9 changes: 7 additions & 2 deletions bin/varnishd/cache/cache_tcp_pool.h
Expand Up @@ -50,6 +50,8 @@ void PFD_RemoteName(const struct pfd *, char *, unsigned, char *, unsigned);
* Prototypes
*/

struct VSC_vbe;

struct tcp_pool *VTP_Ref(const struct suckaddr *ip4, const struct suckaddr *ip6,
const char *uds, const void *id);
/*
Expand All @@ -70,9 +72,11 @@ void VTP_Rel(struct tcp_pool **);
* the pool is destroyed and all cached connections closed.
*/

int VTP_Open(const struct tcp_pool *, double tmo, const void **);
int VTP_Open(const struct tcp_pool *, double tmo, const void **,
struct VSC_vbe *);
/*
* Open a new connection and return the adress used.
* Errors will be accounted in the optional vsc
*/

void VTP_Close(struct pfd **);
Expand All @@ -86,9 +90,10 @@ void VTP_Recycle(const struct worker *, struct pfd **);
*/

struct pfd *VTP_Get(struct tcp_pool *, double tmo, struct worker *,
unsigned force_fresh);
unsigned force_fresh, struct VSC_vbe *);
/*
* Get a (possibly) recycled connection.
* Open errors will be accounted in the optional vsc
*/

int VTP_Wait(struct worker *, struct pfd *, double tmo);
Expand Down

0 comments on commit b6c6dd0

Please sign in to comment.