From 258dc95952389d8ba032c3dcceefd25dfb329068 Mon Sep 17 00:00:00 2001 From: Nils Goroll Date: Tue, 3 Apr 2018 11:44:38 +0200 Subject: [PATCH] Statistics for vcp (connection pools) Previously, we had zero reporting on the cause of backend connection errors, which made it close to impossible to diagnose such issues directly. We now add statistics per connection pool and Debug VSLs. Ref #2622 --- bin/varnishd/Makefile.am | 3 +- bin/varnishd/VSC_main.vsc | 10 +++ bin/varnishd/VSC_vcp.vsc | 120 +++++++++++++++++++++++++ bin/varnishd/cache/cache_tcp_pool.c | 133 ++++++++++++++++++++++------ bin/varnishd/cache/cache_tcp_pool.h | 2 +- 5 files changed, 238 insertions(+), 30 deletions(-) create mode 100644 bin/varnishd/VSC_vcp.vsc diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index 739b16cfec1..6c94efc00f4 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -237,7 +237,8 @@ VSC_SRC = \ VSC_sma.vsc \ VSC_smf.vsc \ VSC_smu.vsc \ - VSC_vbe.vsc + VSC_vbe.vsc \ + VSC_vcp.vsc VSC_GEN_C = @VSC_GEN_C@ VSC_GEN_H = @VSC_GEN_H@ diff --git a/bin/varnishd/VSC_main.vsc b/bin/varnishd/VSC_main.vsc index f97370cd693..c1e8ea2eef7 100644 --- a/bin/varnishd/VSC_main.vsc +++ b/bin/varnishd/VSC_main.vsc @@ -758,6 +758,16 @@ :oneliner: ESI parse warnings (unlock) +.. varnish_vsc:: vcp_create + :level: diag + :oneliner: Connection Pool creates + + +.. varnish_vsc:: vcp_destroy + :level: diag + :oneliner: Connection Pool destroys + + .. varnish_vsc:: vmods :type: gauge :oneliner: Loaded VMODs diff --git a/bin/varnishd/VSC_vcp.vsc b/bin/varnishd/VSC_vcp.vsc new file mode 100644 index 00000000000..3088f432046 --- /dev/null +++ b/bin/varnishd/VSC_vcp.vsc @@ -0,0 +1,120 @@ +.. + This is *NOT* a RST file but the syntax has been chosen so + that it may become an RST file at some later date. + +.. varnish_vsc_begin:: vcp + :oneliner: Connection Pool Counters + :order: 65 + + + Counter for operations on connection pools. + +.. varnish_vsc:: n_conn + :type: counter + :level: diag + :oneliner: Number of cached connections + + +.. varnish_vsc:: n_kill + :type: counter + :level: diag + :oneliner: Number of connections still to be closed + + +.. varnish_vsc:: n_used + :type: counter + :level: diag + :oneliner: Number of open connections + + +.. varnish_vsc:: opened + :type: counter + :level: diag + :oneliner: Connections opened + + Connections opened successfully + + This counter may be slightly inaccurate for performance reasons. + + +.. varnish_vsc:: closed + :type: counter + :level: diag + :oneliner: Connections closed + + Connections closed + + +.. varnish_vsc:: recycle + :type: counter + :level: diag + :oneliner: Connections recycled + + Connections kept open (cached) for later reuse + + +.. varnish_vsc:: recycle_fail + :type: counter + :level: diag + :oneliner: Connection recycles (Wait_Enter) failed + + +.. varnish_vsc:: helddown + :type: counter + :level: diag + :oneliner: Connection not attempted due to holddown + + See parameters backend_local_error_holddown and + backend_remote_error_holddown. + + +.. varnish_vsc:: fail + :type: counter + :level: diag + :oneliner: Connections failed + + + Counter of failed opens. Detailed reasons are given in the + fail_* counters and in Debug VSL. + + This counter is the sum of all detailled fail_* counters. + + All fail_* counters may be slightly inaccurate for efficiency. + +.. varnish_vsc:: fail_eacces + :type: counter + :level: diag + :oneliner: Connections failed with EACCES or EPERM + + +.. varnish_vsc:: fail_eaddrnotavail + :type: counter + :level: diag + :oneliner: Connections failed with EADDRNOTAVAIL + + +.. varnish_vsc:: fail_econnrefused + :type: counter + :level: diag + :oneliner: Connections failed with ECONNREFUSED + + +.. varnish_vsc:: fail_enetunreach + :type: counter + :level: diag + :oneliner: Connections failed with ENETUNREACH + + +.. varnish_vsc:: fail_etimedout + :type: counter + :level: diag + :oneliner: Connections failed ETIMEDOUT + + +.. varnish_vsc:: fail_other + :type: counter + :level: diag + :oneliner: Connections failed for other reason + + +.. varnish_vsc_end:: vcp diff --git a/bin/varnishd/cache/cache_tcp_pool.c b/bin/varnishd/cache/cache_tcp_pool.c index 86777ae6c29..ed5083762f2 100644 --- a/bin/varnishd/cache/cache_tcp_pool.c +++ b/bin/varnishd/cache/cache_tcp_pool.c @@ -44,6 +44,7 @@ #include "cache_tcp_pool.h" #include "cache_pool.h" +#include "VSC_vcp.h" struct conn_pool; @@ -91,14 +92,14 @@ struct conn_pool { VTAILQ_ENTRY(conn_pool) list; int refcnt; struct lock mtx; + struct VSC_vcp *stats; + struct vsc_seg *vsc; + /* length: stat->n_conn */ VTAILQ_HEAD(, pfd) connlist; - int n_conn; + /* length: stat->n_kill */ VTAILQ_HEAD(, pfd) killlist; - int n_kill; - - int n_used; }; struct tcp_pool { @@ -178,12 +179,14 @@ vcp_handle(struct waited *w, enum wait_event ev, double now) case PFD_STATE_AVAIL: cp->methods->close(pfd); VTAILQ_REMOVE(&cp->connlist, pfd, list); - cp->n_conn--; + cp->stats->closed++; + cp->stats->n_conn--; FREE_OBJ(pfd); break; case PFD_STATE_CLEANUP: cp->methods->close(pfd); - cp->n_kill--; + cp->stats->closed++; + cp->stats->n_kill--; VTAILQ_REMOVE(&cp->killlist, pfd, list); memset(pfd, 0x11, sizeof *pfd); free(pfd); @@ -224,6 +227,10 @@ static void * VCP_New(struct conn_pool *cp, const void *id, void *priv, const struct cp_methods *cm) { + struct tcp_pool *tp = priv; + char abuf[VTCP_ADDRBUFSIZE]; + char pbuf[VTCP_PORTBUFSIZE]; + const struct suckaddr *addr; AN(cp); AN(cm); @@ -240,8 +247,26 @@ VCP_New(struct conn_pool *cp, const void *id, void *priv, VTAILQ_INIT(&cp->connlist); VTAILQ_INIT(&cp->killlist); + // XXX layering + CHECK_OBJ_NOTNULL(tp, TCP_POOL_MAGIC); + // XXX common cluster/seg + if (tp->uds != NULL) + cp->stats = VSC_vcp_New(NULL, &cp->vsc, "%p.%s", id, tp->uds); + else { + if (! tp->ip4 || (cache_param->prefer_ipv6 && tp->ip6)) + addr = tp->ip6; + else + addr = tp->ip4; + + AN(addr); + VTCP_name(addr, abuf, sizeof abuf, pbuf, sizeof pbuf); + cp->stats = VSC_vcp_New(NULL, &cp->vsc, "%p.%s:%s", id, + abuf, pbuf); + } + Lck_Lock(&conn_pools_mtx); VTAILQ_INSERT_HEAD(&conn_pools, cp, list); + VSC_C_main->vcp_create++; Lck_Unlock(&conn_pools_mtx); return (priv); @@ -279,32 +304,85 @@ VCP_Rel(struct conn_pool *cp) Lck_Unlock(&conn_pools_mtx); return (1); } - AZ(cp->n_used); + AZ(cp->stats->n_used); VTAILQ_REMOVE(&conn_pools, cp, list); + VSC_C_main->vcp_destroy++; Lck_Unlock(&conn_pools_mtx); Lck_Lock(&cp->mtx); VTAILQ_FOREACH_SAFE(pfd, &cp->connlist, list, pfd2) { VTAILQ_REMOVE(&cp->connlist, pfd, list); - cp->n_conn--; + cp->stats->n_conn--; assert(pfd->state == PFD_STATE_AVAIL); pfd->state = PFD_STATE_CLEANUP; (void)shutdown(pfd->fd, SHUT_WR); VTAILQ_INSERT_TAIL(&cp->killlist, pfd, list); - cp->n_kill++; + cp->stats->n_kill++; } - while (cp->n_kill) { + while (cp->stats->n_kill) { Lck_Unlock(&cp->mtx); (void)usleep(20000); Lck_Lock(&cp->mtx); } Lck_Unlock(&cp->mtx); Lck_Delete(&cp->mtx); - AZ(cp->n_conn); - AZ(cp->n_kill); + AZ(cp->stats->n_conn); + AZ(cp->stats->n_kill); + cp->stats = NULL; + VSC_vcp_Destroy(&cp->vsc); + AZ(cp->vsc); return (0); } +/*-------------------------------------------------------------------- + * Open a new connection from pool. This is a distinct function since + * probing cannot use a recycled connection. + */ + +static int +VCP_Open(struct conn_pool *cp, double tmo, const void **privp) +{ + int r; + double h; + + CHECK_OBJ_NOTNULL(cp, CONN_POOL_MAGIC); + + r = cp->methods->open(cp, tmo, privp); + + if (r >= 0) { + cp->stats->opened++; + return (r); + } + + /* stats access unprotected */ + switch (errno) { + case EACCES: + case EPERM: + cp->stats->fail_eacces++; + break; + case EADDRNOTAVAIL: + cp->stats->fail_eaddrnotavail++; + break; + case ECONNREFUSED: + cp->stats->fail_econnrefused++; + break; + case ENETUNREACH: + cp->stats->fail_enetunreach++; + break; + case ETIMEDOUT: + cp->stats->fail_etimedout++; + break; + default: + cp->stats->fail_other++; + // XXX name + VSL(SLT_Debug, 0, "VTP open error %d (%s)", + errno, strerror(errno)); + } + cp->stats->fail++; + + return (r); +} + /*-------------------------------------------------------------------- * Recycle a connection. */ @@ -327,7 +405,7 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp) assert(pfd->fd > 0); Lck_Lock(&cp->mtx); - cp->n_used--; + cp->stats->n_used--; pfd->waited->priv1 = pfd; pfd->waited->fd = pfd->fd; @@ -339,15 +417,17 @@ VCP_Recycle(const struct worker *wrk, struct pfd **pfdp) cp->methods->close(pfd); memset(pfd, 0x33, sizeof *pfd); free(pfd); - // XXX: stats + cp->stats->recycle_fail++; + cp->stats->closed++; pfd = NULL; } else { VTAILQ_INSERT_HEAD(&cp->connlist, pfd, list); + cp->stats->recycle++; i++; } if (pfd != NULL) - cp->n_conn++; + cp->stats->n_conn++; Lck_Unlock(&cp->mtx); if (i && DO_DEBUG(DBG_VTC_MODE)) { @@ -389,16 +469,17 @@ VCP_Close(struct pfd **pfdp) Lck_Lock(&cp->mtx); assert(pfd->state == PFD_STATE_USED || pfd->state == PFD_STATE_STOLEN); - cp->n_used--; + cp->stats->n_used--; if (pfd->state == PFD_STATE_STOLEN) { (void)shutdown(pfd->fd, SHUT_RDWR); VTAILQ_REMOVE(&cp->connlist, pfd, list); pfd->state = PFD_STATE_CLEANUP; VTAILQ_INSERT_HEAD(&cp->killlist, pfd, list); - cp->n_kill++; + cp->stats->n_kill++; } else { assert(pfd->state == PFD_STATE_USED); cp->methods->close(pfd); + cp->stats->closed++; memset(pfd, 0x44, sizeof *pfd); free(pfd); } @@ -428,12 +509,12 @@ VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk, assert(pfd->state == PFD_STATE_AVAIL); VTAILQ_REMOVE(&cp->connlist, pfd, list); VTAILQ_INSERT_TAIL(&cp->connlist, pfd, list); - cp->n_conn--; + cp->stats->n_conn--; VSC_C_main->backend_reuse++; pfd->state = PFD_STATE_STOLEN; pfd->cond = &wrk->cond; } - cp->n_used++; // Opening mostly works + cp->stats->n_used++; // Opening mostly works Lck_Unlock(&cp->mtx); if (pfd != NULL) @@ -444,11 +525,11 @@ VCP_Get(struct conn_pool *cp, double tmo, struct worker *wrk, INIT_OBJ(pfd->waited, WAITED_MAGIC); pfd->state = PFD_STATE_USED; pfd->conn_pool = cp; - pfd->fd = cp->methods->open(cp, tmo, &pfd->priv); + pfd->fd = VCP_Open(cp, tmo, &pfd->priv); if (pfd->fd < 0) { FREE_OBJ(pfd); Lck_Lock(&cp->mtx); - cp->n_used--; // Nope, didn't work after all. + cp->stats->n_used--; // Nope, didn't work after all. Lck_Unlock(&cp->mtx); } else VSC_C_main->backend_conn++; @@ -714,17 +795,13 @@ VTP_Rel(struct tcp_pool **tpp) } /*-------------------------------------------------------------------- - * Open a new connection from pool. This is a distinct function since - * probing cannot use a recycled connection. + * Open a new connection from pool. */ int -VTP_Open(const struct tcp_pool *tp, double tmo, const void **privp) +VTP_Open(struct tcp_pool *tp, double tmo, const void **privp) { - - if (tp->uds != NULL) - return (vus_open(tp->cp, tmo, privp)); - return (vtp_open(tp->cp, tmo, privp)); + return (VCP_Open(tp->cp, tmo, privp)); } /*-------------------------------------------------------------------- diff --git a/bin/varnishd/cache/cache_tcp_pool.h b/bin/varnishd/cache/cache_tcp_pool.h index d554a360f8f..b8100b84002 100644 --- a/bin/varnishd/cache/cache_tcp_pool.h +++ b/bin/varnishd/cache/cache_tcp_pool.h @@ -70,7 +70,7 @@ void VTP_Rel(struct tcp_pool **); * the pool is destroyed and all cached connections closed. */ -int VTP_Open(const struct tcp_pool *, double tmo, const void **); +int VTP_Open(struct tcp_pool *, double tmo, const void **); /* * Open a new connection and return the adress used. */