Skip to content

Commit

Permalink
block: Handle gracefully extra duplicated chunk produced by TSDB issue.
Browse files Browse the repository at this point in the history
TSDB issue: prometheus-junkyard/tsdb#347

How we handle it?
- segregate this issue in special stat entry in verifier.
- auto-fix broken plan block before thanos compaction.
- adding repair job to run offline batch repair for indivdual blocks.

NOTE: At this point we have no power of fixing the bug when someone uses local compaction ):

Fixes #354

Signed-off-by: Bartek Plotka <bwplotka@gmail.com>
  • Loading branch information
bwplotka committed Jun 14, 2018
1 parent 35d7e10 commit 1dda32e
Show file tree
Hide file tree
Showing 8 changed files with 324 additions and 102 deletions.
23 changes: 22 additions & 1 deletion cmd/thanos/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func registerBucket(m map[string]setupFunc, app *kingpin.Application, name strin
PlaceHolder("<bucket>").String()
verifyIssues := verify.Flag("issues", fmt.Sprintf("Issues to verify (and optionally repair). Possible values: %v", allIssues())).
Short('i').Default(verifier.IndexIssueID, verifier.OverlappedBlocksIssueID).Strings()
verifyIDWhitelist := verify.Flag("id-whitelist", "Block IDs to verify (and optionally repair) only. "+
"If none is specified, all blocks will be verified. Repeated field").Strings()
m[name+" verify"] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ bool) error {
bkt, closeFn, err := client.NewBucket(gcsBucket, *s3Config, reg, name)
if err != nil {
Expand Down Expand Up @@ -100,7 +102,26 @@ func registerBucket(m map[string]setupFunc, app *kingpin.Application, name strin
v = verifier.New(logger, bkt, issues)
}

return v.Verify(ctx)
idMatcher := func(ulid.ULID) bool { return true }
if len(*verifyIDWhitelist) > 0 {
whilelistIDs := map[string]struct{}{}
for _, bid := range *verifyIDWhitelist {
id, err := ulid.Parse(bid)
if err != nil {
return errors.Wrap(err, "invalid ULID found in --id-whitelist flag")
}
whilelistIDs[id.String()] = struct{}{}
}

idMatcher = func(id ulid.ULID) bool {
if _, ok := whilelistIDs[id.String()]; !ok {
return false
}
return true
}
}

return v.Verify(ctx, idMatcher)
}

ls := cmd.Command("ls", "list all blocks in the bucket")
Expand Down
21 changes: 15 additions & 6 deletions cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,23 @@ func runCompact(
done := true
for _, g := range groups {
id, err := g.Compact(ctx, compactDir, comp)
if err != nil {
return errors.Wrap(err, "compaction")
if err == nil {
// If the returned ID has a zero value, the group had no blocks to be compacted.
// We keep going through the outer loop until no group has any work left.
if id != (ulid.ULID{}) {
done = false
}
continue
}
// If the returned ID has a zero value, the group had no blocks to be compacted.
// We keep going through the outer loop until no group has any work left.
if id != (ulid.ULID{}) {
done = false

if compact.IsIssue347Error(err) {
err = compact.RepairIssue347(ctx, logger, bkt, err)
if err == nil {
done = false
continue
}
}
return errors.Wrap(err, "compaction")
}
if done {
break
Expand Down
Loading

0 comments on commit 1dda32e

Please sign in to comment.