Skip to content

Commit

Permalink
Add support for decompression of HTTP responses
Browse files Browse the repository at this point in the history
If the module configuration specifies the "compression" option
blackbox_exporter will try to decompress the response using the
specified algorithm. If the response is not compressed using that
algorithm, the probe will fail.

It validates that the "Accept-Encoding" header is either absent, or that
it specifies the same algorithm as the "compression" option. If the
"Accept-Encoding" header is present but it specifies a different
algorithm, the probe will fail.

If the compression option is *not* used, probe_http_content_length and
probe_http_uncompressed_body_length will have the same value
corresponding to the original content length. If the compression option
is used and the content can be decompressed, probe_http_content_length
will report the original content length as it currently does, and
probe_http_uncompressed_body_length will report the length of the body
after decompression as expected.

Fixes #684

Signed-off-by: Marcelo E. Magallon <marcelo.magallon@grafana.com>
  • Loading branch information
mem committed Apr 19, 2021
1 parent 5e7494a commit 2d378fe
Show file tree
Hide file tree
Showing 10 changed files with 589 additions and 2 deletions.
12 changes: 12 additions & 0 deletions CONFIGURATION.md
Expand Up @@ -49,6 +49,18 @@ The other placeholders are specified separately.
headers:
[ <string>: <string> ... ]

# The compression algorithm to use to decompress the response (gzip, br, deflate, identity).
#
# If an "Accept-Encoding" header is specified, it MUST be such that the compression algorithm
# indicated using this option is acceptable. For example, you can use `compression: gzip` and
# `Accept-Encoding: br, gzip` or `Accept-Encoding: br;q=1.0, gzip;q=0.9`. The fact that gzip is
# acceptable with a lower quality than br does not invalidate the configuration, as you might
# be testing that the server does not return br-encoded content even if it's requested. On the
# other hand, `compression: gzip` and `Accept-Encoding: br, identity` is NOT a valid
# configuration, because you are asking for gzip to NOT be returned, and trying to decompress
# whatever the server returns is likely going to fail.
[ compression: <string> | default = "" ]

# Whether or not the probe will follow any redirects.
[ no_follow_redirects: <boolean> | default = false ]

Expand Down
82 changes: 82 additions & 0 deletions config/config.go
Expand Up @@ -16,9 +16,13 @@ package config
import (
"errors"
"fmt"
"math"
"os"
"regexp"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -188,6 +192,7 @@ type HTTPProbe struct {
FailIfHeaderNotMatchesRegexp []HeaderMatch `yaml:"fail_if_header_not_matches,omitempty"`
Body string `yaml:"body,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:"http_client_config,inline"`
Compression string `yaml:"compression,omitempty"`
}

type HeaderMatch struct {
Expand Down Expand Up @@ -271,6 +276,16 @@ func (s *HTTPProbe) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err := s.HTTPClientConfig.Validate(); err != nil {
return err
}

for key, value := range s.Headers {
switch strings.Title(key) {
case "Accept-Encoding":
if !isCompressionAcceptEncodingValid(s.Compression, value) {
return fmt.Errorf(`invalid configuration "%s: %s", "compression: %s"`, key, value, s.Compression)
}
}
}

return nil
}

Expand Down Expand Up @@ -358,3 +373,70 @@ func (s *HeaderMatch) UnmarshalYAML(unmarshal func(interface{}) error) error {

return nil
}

// isCompressionAcceptEncodingValid validates the compression +
// Accept-Encoding combination.
//
// If there's a compression setting, and there's also an accept-encoding
// header, they MUST match, otherwise we end up requesting something
// that doesn't include the specified compression, and that's likely to
// fail, depending on how the server is configured. Testing that the
// server _ignores_ Accept-Encoding, e.g. by not including a particular
// compression in the header but expecting it in the response falls out
// of the scope of the tests we perform.
//
// With that logic, this function validates that if a compression
// algorithm is specified, it's covered by the specified accept encoding
// header. It doesn't need to be the most prefered encoding, but it MUST
// be included in the prefered encodings.
func isCompressionAcceptEncodingValid(encoding, acceptEncoding string) bool {
// unspecified compression + any encoding value is valid
// any compression + no accept encoding is valid
if encoding == "" || acceptEncoding == "" {
return true
}

type encodingQuality struct {
encoding string
quality float32
}

var encodings []encodingQuality

for _, parts := range strings.Split(acceptEncoding, ",") {
var e encodingQuality

if idx := strings.LastIndexByte(parts, ';'); idx == -1 {
e.encoding = strings.TrimSpace(parts)
e.quality = 1.0
} else {
parseQuality := func(str string) float32 {
q, err := strconv.ParseFloat(str, 32)
if err != nil {
return 0
}
return float32(math.Round(q*1000) / 1000)
}

e.encoding = strings.TrimSpace(parts[:idx])

q := strings.TrimSpace(parts[idx+1:])
q = strings.TrimPrefix(q, "q=")
e.quality = parseQuality(q)
}

encodings = append(encodings, e)
}

sort.SliceStable(encodings, func(i, j int) bool {
return encodings[j].quality < encodings[i].quality
})

for _, e := range encodings {
if encoding == e.encoding || e.encoding == "*" {
return e.quality > 0
}
}

return false
}
91 changes: 91 additions & 0 deletions config/config_test.go
Expand Up @@ -75,6 +75,14 @@ func TestLoadBadConfigs(t *testing.T) {
input: "testdata/invalid-http-header-match-regexp.yml",
want: `error parsing config file: "Could not compile regular expression" regexp=":["`,
},
{
input: "testdata/invalid-http-compression-mismatch.yml",
want: `error parsing config file: invalid configuration "Accept-Encoding: deflate", "compression: gzip"`,
},
{
input: "testdata/invalid-http-request-compression-reject-all-encodings.yml",
want: `error parsing config file: invalid configuration "Accept-Encoding: *;q=0.0", "compression: gzip"`,
},
{
input: "testdata/invalid-tcp-query-response-regexp.yml",
want: `error parsing config file: "Could not compile regular expression" regexp=":["`,
Expand Down Expand Up @@ -111,3 +119,86 @@ func TestHideConfigSecrets(t *testing.T) {
t.Fatal("config's String method reveals authentication credentials.")
}
}

func TestIsEncodingAcceptable(t *testing.T) {
testcases := map[string]struct {
input string
acceptEncoding string
expected bool
}{
"empty compression": {
input: "",
acceptEncoding: "gzip",
expected: true,
},
"trivial": {
input: "gzip",
acceptEncoding: "gzip",
expected: true,
},
"trivial, quality": {
input: "gzip",
acceptEncoding: "gzip;q=1.0",
expected: true,
},
"first": {
input: "gzip",
acceptEncoding: "gzip, compress",
expected: true,
},
"second": {
input: "gzip",
acceptEncoding: "compress, gzip",
expected: true,
},
"missing": {
input: "br",
acceptEncoding: "gzip, compress",
expected: false,
},
"*": {
input: "br",
acceptEncoding: "gzip, compress, *",
expected: true,
},
"* with quality": {
input: "br",
acceptEncoding: "gzip, compress, *;q=0.1",
expected: true,
},
"rejected": {
input: "br",
acceptEncoding: "gzip, compress, br;q=0.0",
expected: false,
},
"rejected *": {
input: "br",
acceptEncoding: "gzip, compress, *;q=0.0",
expected: false,
},
"complex": {
input: "br",
acceptEncoding: "gzip;q=1.0, compress;q=0.5, br;q=0.1, *;q=0.0",
expected: true,
},
"complex out of order": {
input: "br",
acceptEncoding: "*;q=0.0, compress;q=0.5, br;q=0.1, gzip;q=1.0",
expected: true,
},
"complex with extra blanks": {
input: "br",
acceptEncoding: " gzip;q=1.0, compress; q=0.5, br;q=0.1, *; q=0.0 ",
expected: true,
},
}

for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
actual := isCompressionAcceptEncodingValid(tc.input, tc.acceptEncoding)
if actual != tc.expected {
t.Errorf("Unexpected result: input=%q acceptEncoding=%q expected=%t actual=%t", tc.input, tc.acceptEncoding, tc.expected, actual)
}
})
}
}
8 changes: 8 additions & 0 deletions config/testdata/invalid-http-compression-mismatch.yml
@@ -0,0 +1,8 @@
modules:
http_headers:
prober: http
timeout: 5s
http:
compression: gzip
headers:
"Accept-Encoding": "deflate"
@@ -0,0 +1,10 @@
modules:
http_headers:
prober: http
timeout: 5s
http:
# this configuration is invalid because it's requesting a
# compressed encoding, but it's rejecting every possible encoding
compression: gzip
headers:
"Accept-Encoding": "*;q=0.0"
12 changes: 12 additions & 0 deletions example.yml
Expand Up @@ -52,6 +52,18 @@ modules:
method: GET
tls_config:
ca_file: "/certs/my_cert.crt"
http_gzip:
prober: http
http:
method: GET
compression: gzip
http_gzip_with_accept_encoding:
prober: http
http:
method: GET
compression: gzip
headers:
Accept-Encoding: gzip
tls_connect:
prober: tcp
timeout: 5s
Expand Down
1 change: 1 addition & 0 deletions go.mod
@@ -1,6 +1,7 @@
module github.com/prometheus/blackbox_exporter

require (
github.com/andybalholm/brotli v1.0.1
github.com/go-kit/kit v0.10.0
github.com/miekg/dns v1.1.40
github.com/pkg/errors v0.9.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -13,6 +13,8 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
Expand Down
53 changes: 51 additions & 2 deletions prober/http.go
Expand Up @@ -14,6 +14,8 @@
package prober

import (
"compress/flate"
"compress/gzip"
"context"
"crypto/tls"
"errors"
Expand All @@ -31,6 +33,7 @@ import (
"sync"
"time"

"github.com/andybalholm/brotli"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -414,6 +417,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
request.Host = value
continue
}

request.Header.Set(key, value)
}

Expand Down Expand Up @@ -470,6 +474,31 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
}
}

// Since the configuration specifies a compression algorithm, blindly treat the response body as a
// compressed payload; if we cannot decompress it it's a failure because the configuration says we
// should expect the response to be compressed in that way.
if httpConfig.Compression != "" {
dec, err := getDecompressionReader(httpConfig.Compression, resp.Body)
if err != nil {
level.Info(logger).Log("msg", "Failed to get decompressor for HTTP response body", "err", err)
success = false
} else if dec != nil {
// Since we are replacing the original resp.Body with the decoder, we need to make sure
// we close the original body. We cannot close it right away because the decompressor
// might not have read it yet.
defer func(c io.Closer) {
err := c.Close()
if err != nil {
// At this point we cannot really do anything with this error, but log
// it in case it contains useful information as to what's the problem.
level.Info(logger).Log("msg", "Error while closing response from server", "err", err)
}
}(resp.Body)

resp.Body = dec
}
}

byteCounter := &byteCounter{ReadCloser: resp.Body}

if success && (len(httpConfig.FailIfBodyMatchesRegexp) > 0 || len(httpConfig.FailIfBodyNotMatchesRegexp) > 0) {
Expand All @@ -491,8 +520,9 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
respBodyBytes = byteCounter.n

if err := byteCounter.Close(); err != nil {
// We have already read everything we could from the server. The error here might be a
// TCP error. Log it in case it contains useful information as to what's the problem.
// We have already read everything we could from the server, maybe even uncompressed the
// body. The error here might be either a decompression error or a TCP error. Log it in
// case it contains useful information as to what's the problem.
level.Info(logger).Log("msg", "Error while closing response from server", "error", err.Error())
}
}
Expand Down Expand Up @@ -595,3 +625,22 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
redirectsGauge.Set(float64(redirects))
return
}

func getDecompressionReader(algorithm string, origBody io.ReadCloser) (io.ReadCloser, error) {
switch strings.ToLower(algorithm) {
case "br":
return ioutil.NopCloser(brotli.NewReader(origBody)), nil

case "deflate":
return flate.NewReader(origBody), nil

case "gzip":
return gzip.NewReader(origBody)

case "identity", "":
return origBody, nil

default:
return nil, errors.New("unsupported compression algorithm")
}
}

0 comments on commit 2d378fe

Please sign in to comment.