From c31cb08f1b0cf9ad7f23a57210812499a8fa0764 Mon Sep 17 00:00:00 2001 From: mzardab Date: Mon, 10 Jan 2022 16:36:38 +0000 Subject: [PATCH 1/5] Renaming `DownsampleRange` -> `ResLevel1DownsampleRange` to specify the mapping between the downsample ranges and their respective resolutions Signed-off-by: mzardab --- cmd/thanos/downsample.go | 4 ++-- cmd/thanos/main_test.go | 8 ++++---- pkg/compact/compact.go | 8 ++++---- pkg/compact/compact_test.go | 18 +++++++++--------- pkg/compact/downsample/downsample.go | 4 ++-- test/e2e/e2ethanos/services.go | 1 - 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 5a9eaa2cf06..ef1893ea0d0 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -283,7 +283,7 @@ metaSendLoop: // Only downsample blocks once we are sure to get roughly 2 chunks out of it. // NOTE(fabxc): this must match with at which block size the compactor creates downsampled // blocks. Otherwise we may never downsample some data. - if m.MaxTime-m.MinTime < downsample.DownsampleRange0 { + if m.MaxTime-m.MinTime < downsample.ResLevel1DownsampleRange { continue } @@ -301,7 +301,7 @@ metaSendLoop: // Only downsample blocks once we are sure to get roughly 2 chunks out of it. // NOTE(fabxc): this must match with at which block size the compactor creates downsampled // blocks. Otherwise we may never downsample some data. - if m.MaxTime-m.MinTime < downsample.DownsampleRange1 { + if m.MaxTime-m.MinTime < downsample.ResLevel2DownsampleRange { continue } } diff --git a/cmd/thanos/main_test.go b/cmd/thanos/main_test.go index 18dd566aa6c..5f2f8844dec 100644 --- a/cmd/thanos/main_test.go +++ b/cmd/thanos/main_test.go @@ -121,7 +121,7 @@ func TestRegression4960_Deadlock(t *testing.T) { ctx, dir, []labels.Labels{{{Name: "a", Value: "1"}}}, - 1, 0, downsample.DownsampleRange0+1, // Pass the minimum DownsampleRange0 check. + 1, 0, downsample.ResLevel1DownsampleRange+1, // Pass the minimum ResLevel1DownsampleRange check. labels.Labels{{Name: "e1", Value: "1"}}, downsample.ResLevel0, metadata.NoneFunc) testutil.Ok(t, err) @@ -132,7 +132,7 @@ func TestRegression4960_Deadlock(t *testing.T) { ctx, dir, []labels.Labels{{{Name: "a", Value: "2"}}}, - 1, 0, downsample.DownsampleRange0+1, // Pass the minimum DownsampleRange0 check. + 1, 0, downsample.ResLevel1DownsampleRange+1, // Pass the minimum ResLevel1DownsampleRange check. labels.Labels{{Name: "e1", Value: "2"}}, downsample.ResLevel0, metadata.NoneFunc) testutil.Ok(t, err) @@ -143,7 +143,7 @@ func TestRegression4960_Deadlock(t *testing.T) { ctx, dir, []labels.Labels{{{Name: "a", Value: "2"}}}, - 1, 0, downsample.DownsampleRange0+1, // Pass the minimum DownsampleRange0 check. + 1, 0, downsample.ResLevel1DownsampleRange+1, // Pass the minimum ResLevel1DownsampleRange check. labels.Labels{{Name: "e1", Value: "2"}}, downsample.ResLevel0, metadata.NoneFunc) testutil.Ok(t, err) @@ -183,7 +183,7 @@ func TestCleanupDownsampleCacheFolder(t *testing.T) { ctx, dir, []labels.Labels{{{Name: "a", Value: "1"}}}, - 1, 0, downsample.DownsampleRange0+1, // Pass the minimum DownsampleRange0 check. + 1, 0, downsample.ResLevel1DownsampleRange+1, // Pass the minimum ResLevel1DownsampleRange check. labels.Labels{{Name: "e1", Value: "1"}}, downsample.ResLevel0, metadata.NoneFunc) testutil.Ok(t, err) diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index c8e9945ba72..4ed4585f24e 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -123,9 +123,9 @@ func UntilNextDownsampling(m *metadata.Meta) (time.Duration, error) { case downsample.ResLevel2: return time.Duration(0), errors.New("no downsampling") case downsample.ResLevel1: - return time.Duration(downsample.DownsampleRange1*time.Millisecond) - timeRange, nil + return time.Duration(downsample.ResLevel2DownsampleRange*time.Millisecond) - timeRange, nil case downsample.ResLevel0: - return time.Duration(downsample.DownsampleRange0*time.Millisecond) - timeRange, nil + return time.Duration(downsample.ResLevel1DownsampleRange*time.Millisecond) - timeRange, nil default: panic(errors.Errorf("invalid resolution %v", m.Thanos.Downsample.Resolution)) } @@ -637,7 +637,7 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g continue } - if m.MaxTime-m.MinTime < downsample.DownsampleRange0 { + if m.MaxTime-m.MinTime < downsample.ResLevel1DownsampleRange { continue } groupBlocks[group.key]++ @@ -653,7 +653,7 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g continue } - if m.MaxTime-m.MinTime < downsample.DownsampleRange1 { + if m.MaxTime-m.MinTime < downsample.ResLevel2DownsampleRange { continue } groupBlocks[group.key]++ diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 8fc55558392..a1b39845068 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -509,10 +509,10 @@ func TestDownsampleProgressCalculate(t *testing.T) { // This test case has blocks from multiple groups and resolution levels. Only the blocks in the second group should be downsampled since the others either have time differences not in the range for their resolution, or a resolution which should not be downsampled. testName: "multi_group_test", input: []*metadata.Meta{ - createBlockMeta(6, 1, downsample.DownsampleRange0, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{7, 8}), - createBlockMeta(7, 0, downsample.DownsampleRange1, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{8, 9}), - createBlockMeta(9, 0, downsample.DownsampleRange1, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{8, 11}), - createBlockMeta(8, 0, downsample.DownsampleRange1, map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{9, 10}), + createBlockMeta(6, 1, downsample.ResLevel1DownsampleRange, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{7, 8}), + createBlockMeta(7, 0, downsample.ResLevel2DownsampleRange, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{8, 9}), + createBlockMeta(9, 0, downsample.ResLevel2DownsampleRange, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{8, 11}), + createBlockMeta(8, 0, downsample.ResLevel2DownsampleRange, map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{9, 10}), }, expected: map[string]float64{ keys[0]: 0.0, @@ -524,7 +524,7 @@ func TestDownsampleProgressCalculate(t *testing.T) { // This block should be downsampled. testName: "res_level0_test", input: []*metadata.Meta{ - createBlockMeta(9, 0, downsample.DownsampleRange0, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{10, 11}), + createBlockMeta(9, 0, downsample.ResLevel1DownsampleRange, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{10, 11}), }, expected: map[string]float64{ keys[0]: 1.0, @@ -534,7 +534,7 @@ func TestDownsampleProgressCalculate(t *testing.T) { // This block should be downsampled. testName: "res_level1_test", input: []*metadata.Meta{ - createBlockMeta(9, 0, downsample.DownsampleRange1, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{10, 11}), + createBlockMeta(9, 0, downsample.ResLevel2DownsampleRange, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{10, 11}), }, expected: map[string]float64{ keys[1]: 1.0, @@ -545,7 +545,7 @@ func TestDownsampleProgressCalculate(t *testing.T) { // Blocks with this resolution should not be downsampled. testName: "res_level2_test", input: []*metadata.Meta{ - createBlockMeta(10, 0, downsample.DownsampleRange1, map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{11, 12}), + createBlockMeta(10, 0, downsample.ResLevel2DownsampleRange, map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{11, 12}), }, expected: map[string]float64{ keys[2]: 0.0, @@ -555,7 +555,7 @@ func TestDownsampleProgressCalculate(t *testing.T) { // This block should be downsampled. testName: "res_level0_test_incorrect", input: []*metadata.Meta{ - createBlockMeta(9, 1, downsample.DownsampleRange0, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{10, 11}), + createBlockMeta(9, 1, downsample.ResLevel1DownsampleRange, map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{10, 11}), }, expected: map[string]float64{ keys[0]: 0.0, @@ -566,7 +566,7 @@ func TestDownsampleProgressCalculate(t *testing.T) { // This block should be downsampled. testName: "res_level1_test", input: []*metadata.Meta{ - createBlockMeta(9, 1, downsample.DownsampleRange1, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{10, 11}), + createBlockMeta(9, 1, downsample.ResLevel2DownsampleRange, map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{10, 11}), }, expected: map[string]float64{ keys[1]: 0.0, diff --git a/pkg/compact/downsample/downsample.go b/pkg/compact/downsample/downsample.go index ea66b295717..f656a796c5a 100644 --- a/pkg/compact/downsample/downsample.go +++ b/pkg/compact/downsample/downsample.go @@ -35,8 +35,8 @@ const ( // Downsampling ranges i.e. minimum block size after which we start to downsample blocks (in seconds). const ( - DownsampleRange0 = 40 * 60 * 60 * 1000 // 40 hours. - DownsampleRange1 = 10 * 24 * 60 * 60 * 1000 // 10 days. + ResLevel1DownsampleRange = 40 * 60 * 60 * 1000 // 40 hours. + ResLevel2DownsampleRange = 10 * 24 * 60 * 60 * 1000 // 10 days. ) // Downsample downsamples the given block. It writes a new block into dir and returns its ID. diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 480e6814616..02bb35e4670 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -735,7 +735,6 @@ func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConf "--selector.relabel-config": string(relabelConfigBytes), "--wait": "", }), extArgs...)...), - Readiness: e2e.NewHTTPReadinessProbe("http", "/-/ready", 200, 200), User: strconv.Itoa(os.Getuid()), WaitReadyBackoff: &defaultBackoffConfig, }, From bd1f9ca678e9de490c7ee949ceb5308efb938f85 Mon Sep 17 00:00:00 2001 From: mzardab Date: Mon, 10 Jan 2022 16:40:11 +0000 Subject: [PATCH 2/5] Adding compactor downsample retention flag validation... ...if the following conditions are met: * Downsampling is enabled, raw retention must be > 10 days * Downsampling is enabled, 5m resolution retention must be at least 40h * Downsampling is enabled, 1h resolution must be at least 10d This is to avoid unexpected deletions of downsampled/raw data as compactor will only downsample raw blocks after there is enough data to form 2 whole blocks, it will not downsample immediately. Signed-off-by: mzardab --- cmd/thanos/compact.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 2b75a88edea..7555ed046da 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -388,12 +388,24 @@ func runCompact( } if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { + // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Seconds() < downsample.ResLevel2DownsampleRange { + return errors.New("raw resolution must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + } level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) } if retentionByResolution[compact.ResolutionLevel5m].Seconds() != 0 { + // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Seconds() < downsample.ResLevel1DownsampleRange { + return errors.New("5m resolution retention must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") + } level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) } if retentionByResolution[compact.ResolutionLevel1h].Seconds() != 0 { + // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel1h].Seconds() < downsample.ResLevel2DownsampleRange { + return errors.New("1h resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + } level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } From d7dded1d06ea9479221c04f61f3ef22eb19920ab Mon Sep 17 00:00:00 2001 From: mzardab Date: Tue, 11 Jan 2022 09:03:14 +0000 Subject: [PATCH 3/5] Added more clarifying docs on impact of retention on downsampling Signed-off-by: mzardab --- cmd/thanos/compact.go | 8 ++++---- docs/components/compact.md | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 7555ed046da..4f2f73286d4 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -389,15 +389,15 @@ func runCompact( if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Seconds() < downsample.ResLevel2DownsampleRange { - return errors.New("raw resolution must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Seconds() < downsample.ResLevel1DownsampleRange { + return errors.New("raw resolution must be higher than the minimum block size after which 1h resolution downsampling will occur (40 hours)") } level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) } if retentionByResolution[compact.ResolutionLevel5m].Seconds() != 0 { // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Seconds() < downsample.ResLevel1DownsampleRange { - return errors.New("5m resolution retention must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") + if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Seconds() < downsample.ResLevel2DownsampleRange { + return errors.New("5m resolution retention must be higher than the minimum block size after which 5m resolution downsampling will occur (10 days)") } level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) } diff --git a/docs/components/compact.md b/docs/components/compact.md index d5ffad27c41..363a80ee175 100644 --- a/docs/components/compact.md +++ b/docs/components/compact.md @@ -2,10 +2,9 @@ The `thanos compact` command applies the compaction procedure of the Prometheus 2.0 storage engine to block data stored in object storage. It is generally not semantically concurrency safe and must be deployed as a singleton against a bucket. -Compactor is also responsible for downsampling of data: - -* Creating 5m downsampling for blocks larger than **40 hours** (2d, 2w) -* Creating 1h downsampling for blocks larger than **10 days** (2w) +Compactor is also responsible for downsampling of data. There is a time delay before downsampling at a given resolution is possible. This is necessary because downsampled chunks will have fewer samples in them, and as chunks are fixed size, data spanning more time will be required to fill them. +* Creating 5m downsampling for blocks older than **40 hours** (2d) +* Creating 1h downsampling for blocks older than **10 days** (2w) Example: @@ -161,6 +160,12 @@ Resolution is a distance between data points on your graphs. E.g. * `5 minutes` - data point is every 5 minutes * `1 hour` - data point is every 1h +Compactor downsampling is done in two passes: +1) All raw resolution metrics that are older than **40 hours** are downsampled at a 5m resolution +2) All 5m resolution metrics older than **10 days** are downsampled at a 1h resolution + +> **NOTE:** If retention at each resolution is lower than minimum age for the successive downsampling pass, data will be deleted before downsampling can be completed. As a rule of thumb retention for each downsampling level should be the same, and should be greater than the maximum date range (10 days for 5m to 1h downsampling). + Keep in mind, that the initial goal of downsampling is not saving disk or object storage space. In fact, downsampling doesn't save you **any** space but instead, it adds 2 more blocks for each raw block which are only slightly smaller or relatively similar size to raw block. This is done by internal downsampling implementation which to be mathematically correct holds various aggregations. This means that downsampling can increase the size of your storage a bit (~3x), if you choose to store all resolutions (recommended and by default). The goal of downsampling is to provide an opportunity to get fast results for range queries of big time intervals like months or years. In other words, if you set `--retention.resolution-raw` less than `--retention.resolution-5m` and `--retention.resolution-1h` - you might run into a problem of not being able to "zoom in" to your historical data. From 0e173286148a60868a2446eb8d05bf7c5e6d2e41 Mon Sep 17 00:00:00 2001 From: mzardab Date: Tue, 11 Jan 2022 10:27:57 +0000 Subject: [PATCH 4/5] Updated CHANGELOG.md Signed-off-by: mzardab --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbc799f4689..6d9e41fd53d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4946](https://github.com/thanos-io/thanos/pull/4946) Store: Support tls_config configuration for the s3 minio client. - [#4974](https://github.com/thanos-io/thanos/pull/4974) Store: Support tls_config configuration for connecting with Azure storage. - [#4999](https://github.com/thanos-io/thanos/pull/4999) COS: Support `endpoint` configuration for vpc internal endpoint. +- [#5059](https://github.com/thanos-io/thanos/pull/5059) Compactor: Adding minimum retention flag validation for downsampling retention. ### Fixed - [#4918](https://github.com/thanos-io/thanos/pull/4918) Tracing: Fixing force tracing with Jaeger. From 11c30085776723884bdcea419415a800ed8a42b5 Mon Sep 17 00:00:00 2001 From: mzardab Date: Tue, 18 Jan 2022 10:00:07 +0000 Subject: [PATCH 5/5] Addressing PR comments Signed-off-by: mzardab --- cmd/thanos/compact.go | 8 ++------ test/e2e/e2ethanos/services.go | 1 + 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 4f2f73286d4..bad0c7f4a2a 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -390,22 +390,18 @@ func runCompact( if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { // If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution) if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Seconds() < downsample.ResLevel1DownsampleRange { - return errors.New("raw resolution must be higher than the minimum block size after which 1h resolution downsampling will occur (40 hours)") + return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)") } level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) } if retentionByResolution[compact.ResolutionLevel5m].Seconds() != 0 { // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Seconds() < downsample.ResLevel2DownsampleRange { - return errors.New("5m resolution retention must be higher than the minimum block size after which 5m resolution downsampling will occur (10 days)") + return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") } level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) } if retentionByResolution[compact.ResolutionLevel1h].Seconds() != 0 { - // If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted - if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel1h].Seconds() < downsample.ResLevel2DownsampleRange { - return errors.New("1h resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)") - } level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 02bb35e4670..480e6814616 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -735,6 +735,7 @@ func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConf "--selector.relabel-config": string(relabelConfigBytes), "--wait": "", }), extArgs...)...), + Readiness: e2e.NewHTTPReadinessProbe("http", "/-/ready", 200, 200), User: strconv.Itoa(os.Getuid()), WaitReadyBackoff: &defaultBackoffConfig, },