Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Ruler][DNS] Don't propagate no such host error if using default resolver #3257

Merged
merged 11 commits into from Oct 15, 2020
3 changes: 3 additions & 0 deletions CHANGELOG.md
Expand Up @@ -11,7 +11,10 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

## Unreleased


### Fixed
- [#3261](https://github.com/thanos-io/thanos/pull/3261) Thanos Store: Use segment files specified in meta.json file, if present. If not present, Store does the LIST operation as before.
- [#3257](https://github.com/thanos-io/thanos/pull/3257) Ruler: Prevent Ruler from crashing when using default DNS to lookup hosts that results in "No such hosts" errors.

## [v0.16.0](https://github.com/thanos-io/thanos/releases) - Release in progress

Expand Down
6 changes: 4 additions & 2 deletions cmd/thanos/query.go
Expand Up @@ -261,10 +261,11 @@ func runQuery(
}

fileSDCache := cache.New()
dnsStoreProvider := dns.NewProvider(
dnsStoreProvider := dns.NewProviderWithReturnOnErrorIfNotFound(
logger,
extprom.WrapRegistererWithPrefix("thanos_querier_store_apis_", reg),
dns.ResolverType(dnsSDResolver),
true,
)

for _, store := range strictStores {
Expand All @@ -273,10 +274,11 @@ func runQuery(
}
}

dnsRuleProvider := dns.NewProvider(
dnsRuleProvider := dns.NewProviderWithReturnOnErrorIfNotFound(
logger,
extprom.WrapRegistererWithPrefix("thanos_querier_rule_apis_", reg),
dns.ResolverType(dnsSDResolver),
true,
)

var (
Expand Down
6 changes: 4 additions & 2 deletions cmd/thanos/rule.go
Expand Up @@ -334,10 +334,11 @@ func runRule(
}
}

queryProvider := dns.NewProvider(
queryProvider := dns.NewProviderWithReturnOnErrorIfNotFound(
logger,
extprom.WrapRegistererWithPrefix("thanos_ruler_query_apis_", reg),
dns.ResolverType(dnsSDResolver),
true,
)
var queryClients []*http_util.Client
for _, cfg := range queryCfg {
Expand Down Expand Up @@ -397,10 +398,11 @@ func runRule(
level.Warn(logger).Log("msg", "no alertmanager configured")
}

amProvider := dns.NewProvider(
amProvider := dns.NewProviderWithReturnOnErrorIfNotFound(
logger,
extprom.WrapRegistererWithPrefix("thanos_ruler_alertmanagers_", reg),
dns.ResolverType(dnsSDResolver),
false,
)
var alertmgrs []*alert.Alertmanager
for _, cfg := range alertingCfg.Alertmanagers {
Expand Down
3 changes: 2 additions & 1 deletion pkg/cacheutil/memcached_client.go
Expand Up @@ -215,10 +215,11 @@ func newMemcachedClient(
config MemcachedClientConfig,
reg prometheus.Registerer,
) (*memcachedClient, error) {
dnsProvider := dns.NewProvider(
dnsProvider := dns.NewProviderWithReturnOnErrorIfNotFound(
logger,
extprom.WrapRegistererWithPrefix("thanos_memcached_", reg),
dns.GolangResolverType,
true,
)

c := &memcachedClient{
Expand Down
29 changes: 28 additions & 1 deletion pkg/discovery/dns/provider.go
Expand Up @@ -53,11 +53,38 @@ func (t ResolverType) ToResolver(logger log.Logger) ipLookupResolver {
return r
}

// Deprecated. Use NewProviderWithReturnOnErrorIfNotFound instead.
//
// NewProvider returns a new empty provider with a given resolver type.
// If empty resolver type is net.DefaultResolver.
// TODO(OGKevin): Refactor code to use new method and eventually rename NewProviderWithReturnOnErrorIfNotFound back to NewProvider.
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
func NewProvider(logger log.Logger, reg prometheus.Registerer, resolverType ResolverType) *Provider {
p := &Provider{
resolver: NewResolver(resolverType.ToResolver(logger)),
resolver: NewResolver(resolverType.ToResolver(logger), logger, true),
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
resolved: make(map[string][]string),
logger: logger,
resolverAddrs: extprom.NewTxGaugeVec(reg, prometheus.GaugeOpts{
Name: "dns_provider_results",
Help: "The number of resolved endpoints for each configured address",
}, []string{"addr"}),
resolverLookupsCount: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "dns_lookups_total",
Help: "The number of DNS lookups resolutions attempts",
}),
resolverFailuresCount: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "dns_failures_total",
Help: "The number of DNS lookup failures",
}),
}

return p
}

// NewProviderWithReturnOnErrorIfNotFound returns a new empty provider with a given resolver type.
// If empty resolver type is net.DefaultResolver.
func NewProviderWithReturnOnErrorIfNotFound(logger log.Logger, reg prometheus.Registerer, resolverType ResolverType, returnErrOnNotFound bool) *Provider {
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
p := &Provider{
resolver: NewResolver(resolverType.ToResolver(logger), logger, returnErrOnNotFound),
resolved: make(map[string][]string),
logger: logger,
resolverAddrs: extprom.NewTxGaugeVec(reg, prometheus.GaugeOpts{
Expand Down
2 changes: 1 addition & 1 deletion pkg/discovery/dns/provider_test.go
Expand Up @@ -22,7 +22,7 @@ func TestProvider(t *testing.T) {
"127.0.0.5:19095",
}

prv := NewProvider(log.NewNopLogger(), nil, "")
prv := NewProviderWithReturnOnErrorIfNotFound(log.NewNopLogger(), nil, "", true)
prv.resolver = &mockResolver{
res: map[string][]string{
"a": ips[:2],
Expand Down
26 changes: 23 additions & 3 deletions pkg/discovery/dns/resolver.go
Expand Up @@ -9,6 +9,9 @@ import (
"strconv"
"strings"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"

"github.com/pkg/errors"
)

Expand Down Expand Up @@ -38,11 +41,15 @@ type ipLookupResolver interface {

type dnsSD struct {
resolver ipLookupResolver
logger log.Logger
// https://github.com/thanos-io/thanos/issues/3186
// This flag is used to prevent components from crashing if hosts are not found.
returnErrOnNotFound bool
}

// NewResolver creates a resolver with given underlying resolver.
func NewResolver(resolver ipLookupResolver) Resolver {
return &dnsSD{resolver: resolver}
func NewResolver(resolver ipLookupResolver, logger log.Logger, returnErrOnNotFound bool) Resolver {
return &dnsSD{resolver: resolver, logger: logger, returnErrOnNotFound: returnErrOnNotFound}
}

func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string, error) {
Expand Down Expand Up @@ -71,7 +78,14 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string
}
ips, err := s.resolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, errors.Wrapf(err, "lookup IP addresses %q", host)
dnsErr, ok := err.(*net.DNSError)
// https://github.com/thanos-io/thanos/issues/3186
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
// Default DNS resolver can make thanos components crash if DSN resolutions results in EAI_NONAME.
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
// the flag returnErrOnNotFound can be used to prevent such crash.
if !(!s.returnErrOnNotFound && ok && dnsErr.IsNotFound) {
return nil, errors.Wrapf(err, "lookup IP addresses %q", host)
}
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
level.Error(s.logger).Log("msg", "failed to lookup IP addresses", "host", host, "err", err)
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's delete this and if only, let's have one for both miekg and Go DNS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm I don't quite understand this comment 🤔. Can you elaborate a lillte bit more?

}
for _, ip := range ips {
res = append(res, appendScheme(scheme, net.JoinHostPort(ip.String(), port)))
Expand Down Expand Up @@ -106,6 +120,12 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string
return nil, errors.Errorf("invalid lookup scheme %q", qtype)
}

// https://github.com/thanos-io/thanos/issues/3186
// This happens when miekg is used as resolver. When the host cannot be found, nothing is returned.
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
if res == nil && err == nil {
level.Warn(s.logger).Log("msg", "IP address lookup yielded no results nor errors", "host", host)
OGKevin marked this conversation as resolved.
Show resolved Hide resolved
}

return res, nil
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/discovery/dns/resolver_test.go
Expand Up @@ -9,6 +9,8 @@ import (
"sort"
"testing"

"github.com/go-kit/kit/log"

"github.com/pkg/errors"
"github.com/thanos-io/thanos/pkg/testutil"
)
Expand Down Expand Up @@ -184,7 +186,7 @@ func TestDnsSD_Resolve(t *testing.T) {

func testDnsSd(t *testing.T, tt DNSSDTest) {
ctx := context.TODO()
dnsSD := dnsSD{tt.resolver}
dnsSD := dnsSD{tt.resolver, log.NewNopLogger(), false}

result, err := dnsSD.Resolve(ctx, tt.addr, tt.qtype)
if tt.expectedErr != nil {
Expand Down