-
Notifications
You must be signed in to change notification settings - Fork 178
Make sure blocks don't overlap to avoid outsider chunks #348
Changes from all commits
0e4be52
4ed6b9e
1e1b2e1
24b223c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,8 @@ import ( | |
|
||
"github.com/oklog/ulid" | ||
"github.com/pkg/errors" | ||
"github.com/prometheus/tsdb/chunks" | ||
"github.com/prometheus/tsdb/index" | ||
"github.com/prometheus/tsdb/labels" | ||
"github.com/prometheus/tsdb/testutil" | ||
) | ||
|
@@ -1096,3 +1098,90 @@ func TestOverlappingBlocksDetectsAllOverlaps(t *testing.T) { | |
{Min: 8, Max: 9}: {nc1[8], nc1[9]}, // 7-10, 8-9 | ||
}, OverlappingBlocks(nc1)) | ||
} | ||
|
||
// Regression test for https://github.com/prometheus/tsdb/issues/347 | ||
func TestChunkAtBlockBoundary(t *testing.T) { | ||
db, close := openTestDB(t, nil) | ||
defer close() | ||
defer db.Close() | ||
|
||
app := db.Appender() | ||
|
||
blockRange := DefaultOptions.BlockRanges[0] | ||
label := labels.FromStrings("foo", "bar") | ||
|
||
for i := int64(0); i < 3; i++ { | ||
_, err := app.Add(label, i*blockRange, 0) | ||
testutil.Ok(t, err) | ||
_, err = app.Add(label, i*blockRange+1000, 0) | ||
testutil.Ok(t, err) | ||
} | ||
|
||
err := app.Commit() | ||
testutil.Ok(t, err) | ||
|
||
_, err = db.compact() | ||
testutil.Ok(t, err) | ||
|
||
for _, block := range db.blocks { | ||
r, err := block.Index() | ||
testutil.Ok(t, err) | ||
defer r.Close() | ||
|
||
meta := block.Meta() | ||
|
||
p, err := r.Postings(index.AllPostingsKey()) | ||
testutil.Ok(t, err) | ||
|
||
var ( | ||
lset labels.Labels | ||
chks []chunks.Meta | ||
) | ||
|
||
chunkCount := 0 | ||
|
||
for p.Next() { | ||
err = r.Series(p.At(), &lset, &chks) | ||
testutil.Ok(t, err) | ||
for _, c := range chks { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we at least count chunks here? We should have 2 blocks (?) one chunk in each I think. That is the only thing I wanted to add in my extension. (wanted to assert on exact chunk time range, but just counting these would be nice as well) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can assert the number of chunks per block, yes. Not sure about checking the number of blocks, because it depends on this condition in if db.head.MaxTime()-db.head.MinTime() <= db.opts.BlockRanges[0]/2*3 {
break
} which could change and break the test for no reason. (In fact, we only have one block here because of this condition.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is a priority to test if actually, next block will include the chunk we ignore with
Can we somehow make it test resilient on change here?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair point, I'll look into it. |
||
testutil.Assert(t, meta.MinTime <= c.MinTime && c.MaxTime <= meta.MaxTime, | ||
"chunk spans beyond block boundaries: [block.MinTime=%d, block.MaxTime=%d]; [chunk.MinTime=%d, chunk.MaxTime=%d]", | ||
meta.MinTime, meta.MaxTime, c.MinTime, c.MaxTime) | ||
chunkCount++ | ||
} | ||
} | ||
testutil.Assert(t, chunkCount == 1, "expected 1 chunk in block %s, got %d", meta.ULID, chunkCount) | ||
} | ||
} | ||
|
||
func TestQuerierWithBoundaryChunks(t *testing.T) { | ||
db, close := openTestDB(t, nil) | ||
defer close() | ||
defer db.Close() | ||
|
||
app := db.Appender() | ||
|
||
blockRange := DefaultOptions.BlockRanges[0] | ||
label := labels.FromStrings("foo", "bar") | ||
|
||
for i := int64(0); i < 5; i++ { | ||
_, err := app.Add(label, i*blockRange, 0) | ||
testutil.Ok(t, err) | ||
} | ||
|
||
err := app.Commit() | ||
testutil.Ok(t, err) | ||
|
||
_, err = db.compact() | ||
testutil.Ok(t, err) | ||
|
||
testutil.Assert(t, len(db.blocks) >= 3, "invalid test, less than three blocks in DB") | ||
|
||
q, err := db.Querier(blockRange, 2*blockRange) | ||
testutil.Ok(t, err) | ||
defer q.Close() | ||
|
||
// The requested interval covers 2 blocks, so the querier should contain 2 blocks. | ||
count := len(q.(*querier).blocks) | ||
testutil.Assert(t, count == 2, "expected 2 blocks in querier, got %d", count) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bit confused. You have 2 method name
OverlapsClosedInterval
and commentReturns true if the block overlaps [mint, maxt].
but with different logic.The inner comments and logic make sense though. I think we could leave it as methods and name it just
isOverlapping
(or...halfOpen..
for block and...closed...
for chunk) and move inner comments to method comment.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These methods are good place to explain the difference in time ranges between block and chunks maybe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I can see why you find it confusing. The thing is that the argument of the
OverlapsClosedInterval()
method is always a closed interval; the difference is in the object calling the method.In the case of
block.OverlapsClosedInterval(mint, maxt)
, we compare the half-open[block.MinTime, block.MaxTime)
with the closed[mint, maxt]
; and in the case ofchunk.OverlapsClosedInterval(mint, maxt)
, we compare the closed[chunk.MinTime, chunk.MaxTime]
with the closed[mint, maxt]
.Does that make sense?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And you're right, I should expand on the difference between block and chunk intervals in those method descriptions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wait, but one is Closed, second is Half-Closed interval, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or even
Overlaps
.. basically we want to know ifmint
andmaxt
overlaps with block or chunk. And it gives us right answer (e.g: ifmint
is touchingblock.maxt
- there is NO overlap)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not 100% convinced it's clearer to just call it
Overlaps
. I wanted to highlight the fact themint
andmaxt
arguments refer to a closed interval to avoid someone using it to compare a block with another block, in which case[mint, maxt)
would be a half-open interval, whereas the method expects it to be closed.That being said, if it's the only thing keeping this PR from being merged, I'm happy to make the change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
block1.Overlaps(block2.min, block2.max)
is there anything confusing by the fact that it will check overlaps using[a, b)
because blocks are like this? I think having something likeblock1.OverlapsClosedInterval(block2.min, block2.max)
would be confusing for future readers of this code, because it is sayingclosed
but actually it is half open overlap logic. But I might be wrong here!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But
block1.OverlapsClosedInterval(block2.min, block2.max)
would return the wrong result, because[block2.min, block2.max)
isn't a closed interval. You would getblock1.min <= block2.max && block2.min < block1.max
instead of the correct value for two[a, b)
intervals, which isblock1.min < block2.max && block2.min < block1.max
.That's why I wanted the method name to explicitly state that it requires a closed interval as its argument; it doesn't work on other types of intervals.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got you, agree (: