From 960563cee789fa5bed73e46cfe9521f06102ec37 Mon Sep 17 00:00:00 2001 From: Yash Sharma Date: Mon, 18 May 2020 23:51:51 +0530 Subject: [PATCH] Added logging for dns resolution error in the Query component as well (#2525) * Added logging for dns resolution error in the Query component as well Signed-off-by: Yash Nitpicking Signed-off-by: Yash More nitpicking Signed-off-by: Yash Removed the errorlist from lock scope for faster access Signed-off-by: Yash Added tsdb.MultiError for error handling Signed-off-by: Yash Fixed some import issues Signed-off-by: Yash Fixed linting Signed-off-by: Yash Changelog Signed-off-by: Yash Linting issues Signed-off-by: Yash More linting issues Signed-off-by: Yash fixed import error Signed-off-by: Yash Reverted some changes Signed-off-by: Yash Added some more error checks Signed-off-by: Yash Fixed some failing tests Signed-off-by: Yash Linting Signed-off-by: Yash Removed extra error param for logging Signed-off-by: Yash Changelog modification Signed-off-by: Yash Fixed unused variable Signed-off-by: Yash Logged failed dns resolution in the memcached_client Signed-off-by: Yash Changed the errorList to errs Signed-off-by: Yash Changed concrete error type to primitive one Signed-off-by: Yash Added logging of dns resolution failure in `http.go` Changed the error message in the `memcached_client.go` Signed-off-by: Yash Removed logging of errors in . Now it would return error list, which is to be handled by the receiver Signed-off-by: Yash Removed Err() method Signed-off-by: Yash Fixed import errors Signed-off-by: Yash Removed extra variables Signed-off-by: Yash Linting Signed-off-by: Yash Check to ensure that return error if list of error is non-empty Signed-off-by: Yash Modified tests of provider_test.go Signed-off-by: Yash more checks Signed-off-by: Yash * Remove print statement Signed-off-by: Yash * Instead of logging the error and then returning it, just return the error Signed-off-by: Yash * Error check for resolve method in rule.go Signed-off-by: Yash * Added more useful error message Signed-off-by: Yash * Modified errs.Err() so that it returns error if found else returns nil Signed-off-by: Yash * Linting Signed-off-by: Yash * Added missing imports Signed-off-by: Yash * Fixed some changelog issues Signed-off-by: Yash --- CHANGELOG.md | 1 + cmd/thanos/query.go | 8 ++++++-- cmd/thanos/rule.go | 3 +-- pkg/cacheutil/memcached_client.go | 7 +++++-- pkg/discovery/dns/provider.go | 10 ++++++++-- pkg/discovery/dns/provider_test.go | 21 ++++++++++++++------- pkg/http/http.go | 6 +++--- 7 files changed, 38 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c14164f1a..144a561850 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#2615](https://github.com/thanos-io/thanos/pull/2615) Rule: Fix bugs where rules were out of sync. - [#2614](https://github.com/thanos-io/thanos/pull/2614) Tracing: Disable Elastic APM Go Agent default tracer on initialization to disable the default metric gatherer - [#2548](https://github.com/thanos-io/thanos/pull/2548) Query: Fixed rare cases of double counter reset accounting when querying `rate` with deduplication enabled. +- [#2525](https://github.com/thanos-io/thanos/pull/2525) Query: Fixed logging for dns resolution error in the `Query` component. ### Added diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index e00add908e..61951ce194 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -303,7 +303,9 @@ func runQuery( } fileSDCache.Update(update) stores.Update(ctxUpdate) - dnsProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...)) + if err := dnsProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...)); err != nil { + level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err) + } case <-ctxUpdate.Done(): return nil } @@ -318,7 +320,9 @@ func runQuery( ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { return runutil.Repeat(dnsSDInterval, ctx.Done(), func() error { - dnsProvider.Resolve(ctx, append(fileSDCache.Addresses(), storeAddrs...)) + if err := dnsProvider.Resolve(ctx, append(fileSDCache.Addresses(), storeAddrs...)); err != nil { + level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err) + } return nil }) }, func(error) { diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 1e40acff02..2789178f89 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -748,8 +748,7 @@ func addDiscoveryGroups(g *run.Group, c *http_util.Client, interval time.Duratio g.Add(func() error { return runutil.Repeat(interval, ctx.Done(), func() error { - c.Resolve(ctx) - return nil + return c.Resolve(ctx) }) }, func(error) { cancel() diff --git a/pkg/cacheutil/memcached_client.go b/pkg/cacheutil/memcached_client.go index 99fc545786..9ee3affe0e 100644 --- a/pkg/cacheutil/memcached_client.go +++ b/pkg/cacheutil/memcached_client.go @@ -5,6 +5,7 @@ package cacheutil import ( "context" + "strings" "sync" "time" @@ -462,8 +463,10 @@ func (c *memcachedClient) resolveAddrs() error { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - c.dnsProvider.Resolve(ctx, c.config.Addresses) - + // If some of the dns resolution fails, log the error. + if err := c.dnsProvider.Resolve(ctx, c.config.Addresses); err != nil { + level.Error(c.logger).Log("msg", "failed to resolve addresses for storeAPIs", "addresses", strings.Join(c.config.Addresses, ","), "err", err) + } // Fail in case no server address is resolved. servers := c.dnsProvider.Addresses() if len(servers) == 0 { diff --git a/pkg/discovery/dns/provider.go b/pkg/discovery/dns/provider.go index 1dad2ada9a..153f82769e 100644 --- a/pkg/discovery/dns/provider.go +++ b/pkg/discovery/dns/provider.go @@ -13,6 +13,7 @@ import ( "github.com/go-kit/kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + tsdberrors "github.com/prometheus/prometheus/tsdb/errors" "github.com/thanos-io/thanos/pkg/discovery/dns/miekgdns" "github.com/thanos-io/thanos/pkg/extprom" ) @@ -107,8 +108,10 @@ func GetQTypeName(addr string) (qtype string, name string) { // Resolve stores a list of provided addresses or their DNS records if requested. // Addresses prefixed with `dns+` or `dnssrv+` will be resolved through respective DNS lookup (A/AAAA or SRV). // defaultPort is used for non-SRV records when a port is not supplied. -func (p *Provider) Resolve(ctx context.Context, addrs []string) { +func (p *Provider) Resolve(ctx context.Context, addrs []string) error { resolvedAddrs := map[string][]string{} + errs := tsdberrors.MultiError{} + for _, addr := range addrs { var resolved []string qtype, name := GetQTypeName(addr) @@ -120,9 +123,10 @@ func (p *Provider) Resolve(ctx context.Context, addrs []string) { resolved, err := p.resolver.Resolve(ctx, name, QType(qtype)) p.resolverLookupsCount.Inc() if err != nil { + // Append all the failed dns resolution in the error list. + errs.Add(err) // The DNS resolution failed. Continue without modifying the old records. p.resolverFailuresCount.Inc() - level.Error(p.logger).Log("msg", "dns resolution failed", "addr", addr, "err", err) // Use cached values. p.RLock() resolved = p.resolved[addr] @@ -143,6 +147,8 @@ func (p *Provider) Resolve(ctx context.Context, addrs []string) { p.resolverAddrs.Submit() p.resolved = resolvedAddrs + + return errs.Err() } // Addresses returns the latest addresses present in the Provider. diff --git a/pkg/discovery/dns/provider_test.go b/pkg/discovery/dns/provider_test.go index 585a7afb22..668d5a65c8 100644 --- a/pkg/discovery/dns/provider_test.go +++ b/pkg/discovery/dns/provider_test.go @@ -32,14 +32,16 @@ func TestProvider(t *testing.T) { } ctx := context.TODO() - prv.Resolve(ctx, []string{"any+x"}) + err := prv.Resolve(ctx, []string{"any+x"}) + testutil.Ok(t, err) result := prv.Addresses() sort.Strings(result) testutil.Equals(t, []string(nil), result) testutil.Equals(t, 1, promtestutil.CollectAndCount(prv.resolverAddrs)) testutil.Equals(t, float64(0), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+x"))) - prv.Resolve(ctx, []string{"any+a", "any+b", "any+c"}) + err = prv.Resolve(ctx, []string{"any+a", "any+b", "any+c"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, ips, result) @@ -48,7 +50,8 @@ func TestProvider(t *testing.T) { testutil.Equals(t, float64(2), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+b"))) testutil.Equals(t, float64(1), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+c"))) - prv.Resolve(ctx, []string{"any+b", "any+c"}) + err = prv.Resolve(ctx, []string{"any+b", "any+c"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, ips[2:], result) @@ -56,14 +59,16 @@ func TestProvider(t *testing.T) { testutil.Equals(t, float64(2), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+b"))) testutil.Equals(t, float64(1), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+c"))) - prv.Resolve(ctx, []string{"any+x"}) + err = prv.Resolve(ctx, []string{"any+x"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, []string(nil), result) testutil.Equals(t, 1, promtestutil.CollectAndCount(prv.resolverAddrs)) testutil.Equals(t, float64(0), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+x"))) - prv.Resolve(ctx, []string{"any+a", "any+b", "any+c"}) + err = prv.Resolve(ctx, []string{"any+a", "any+b", "any+c"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, ips, result) @@ -72,7 +77,8 @@ func TestProvider(t *testing.T) { testutil.Equals(t, float64(2), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+b"))) testutil.Equals(t, float64(1), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+c"))) - prv.Resolve(ctx, []string{"any+b", "example.com:90", "any+c"}) + err = prv.Resolve(ctx, []string{"any+b", "example.com:90", "any+c"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, append(ips[2:], "example.com:90"), result) @@ -81,7 +87,8 @@ func TestProvider(t *testing.T) { testutil.Equals(t, float64(1), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("example.com:90"))) testutil.Equals(t, float64(1), promtestutil.ToFloat64(prv.resolverAddrs.WithLabelValues("any+c"))) - prv.Resolve(ctx, []string{"any+b", "any+c"}) + err = prv.Resolve(ctx, []string{"any+b", "any+c"}) + testutil.Ok(t, err) result = prv.Addresses() sort.Strings(result) testutil.Equals(t, ips[2:], result) diff --git a/pkg/http/http.go b/pkg/http/http.go index 27fb25234b..ed0be67270 100644 --- a/pkg/http/http.go +++ b/pkg/http/http.go @@ -163,7 +163,7 @@ func (c FileSDConfig) convert() (file.SDConfig, error) { } type AddressProvider interface { - Resolve(context.Context, []string) + Resolve(context.Context, []string) error Addresses() []string } @@ -259,6 +259,6 @@ func (c *Client) Discover(ctx context.Context) { } // Resolve refreshes and resolves the list of targets. -func (c *Client) Resolve(ctx context.Context) { - c.provider.Resolve(ctx, append(c.fileSDCache.Addresses(), c.staticAddresses...)) +func (c *Client) Resolve(ctx context.Context) error { + return c.provider.Resolve(ctx, append(c.fileSDCache.Addresses(), c.staticAddresses...)) }