Skip to content

Commit

Permalink
Add metrics for repair progress + time since last repair. Fixes #207.
Browse files Browse the repository at this point in the history
  • Loading branch information
Radovan Zvoncek committed Oct 28, 2017
1 parent f188f92 commit e24a3a2
Showing 1 changed file with 38 additions and 4 deletions.
Expand Up @@ -33,6 +33,8 @@
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.stream.Collectors;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.Collections2;
Expand All @@ -56,6 +58,8 @@ public final class RepairRunner implements Runnable {
private JmxProxy jmxConnection;
private final AtomicReferenceArray<UUID> currentlyRunningSegments;
private final List<RingRange> parallelRanges;
private final String metricNameForMillisSinceLastRepair;
private final String metricNameForRepairProgress;

public RepairRunner(AppContext context, UUID repairRunId) throws ReaperException {
LOG.debug("Creating RepairRunner for run with ID {}", repairRunId);
Expand Down Expand Up @@ -96,6 +100,9 @@ public RepairRunner(AppContext context, UUID repairRunId) throws ReaperException
parallelRanges = getParallelRanges(
parallelRepairs,
Lists.newArrayList(Collections2.transform(repairSegments, segment -> segment.getTokenRange())));

metricNameForMillisSinceLastRepair = metricName("millisSinceLastRepair", keyspace);
metricNameForRepairProgress = metricName("repairProgress", keyspace);
}

public UUID getRepairRunId() {
Expand Down Expand Up @@ -200,14 +207,20 @@ private void endRepairRun() {
LOG.info("Repairs for repair run #{} done", repairRunId);
synchronized (this) {
RepairRun repairRun = context.storage.getRepairRun(repairRunId).get();
DateTime repairRunCompleted = DateTime.now();
context.storage.updateRepairRun(
repairRun
.with()
.runState(RepairRun.RunState.DONE)
.endTime(DateTime.now())
.endTime(repairRunCompleted)
.lastEvent("All done")
.build(repairRun.getId()));
killAndCleanupRunner();

context.metricRegistry.remove(metricNameForMillisSinceLastRepair);
context.metricRegistry.register(
metricNameForMillisSinceLastRepair,
(Gauge<Long>) () -> DateTime.now().getMillis() - repairRunCompleted.toInstant().getMillis());
}
}

Expand Down Expand Up @@ -290,9 +303,13 @@ private void startNextSegment() throws ReaperException, InterruptedException {
}

if (!repairStarted && !anythingRunningStill) {
int amountDone = context.storage.getSegmentAmountForRepairRunWithState(repairRunId, RepairSegment.State.DONE);
LOG.info("Repair amount done {}", amountDone);
if (amountDone == context.storage.getSegmentAmountForRepairRun(repairRunId)) {
int segmentsDone = context.storage.getSegmentAmountForRepairRunWithState(repairRunId, RepairSegment.State.DONE);
int segmentsTotal = context.storage.getSegmentAmountForRepairRun(repairRunId);

LOG.info("Repair amount done {}", segmentsDone);
updateRepairProgressMetric(segmentsDone, segmentsTotal);

if (segmentsDone == segmentsTotal) {
endRepairRun();
scheduleRetry = false;
}
Expand Down Expand Up @@ -321,6 +338,9 @@ private boolean repairSegment(final int rangeIndex, final UUID segmentId, RingRa
unitId = repairRun.getRepairUnitId();
intensity = repairRun.getIntensity();
validationParallelism = repairRun.getRepairParallelism();

int amountDone = context.storage.getSegmentAmountForRepairRunWithState(repairRunId, RepairSegment.State.DONE);
updateRepairProgressMetric(amountDone, repairRun.getSegmentCount());
}

RepairUnit repairUnit = context.storage.getRepairUnit(unitId).get();
Expand Down Expand Up @@ -481,4 +501,18 @@ public void killAndCleanupRunner() {
}
Thread.currentThread().interrupt();
}

private String metricName(String metric, String keyspace) {
String cleanClusterName = clusterName.replaceAll("[^A-Za-z0-9]", "");
String cleanKeyspaceName = keyspace.replaceAll("[^A-Za-z0-9]", "");
return MetricRegistry.name(RepairRunner.class, metric, cleanClusterName, cleanKeyspaceName);
}

private void updateRepairProgressMetric(int segmentsDone, int segmentsTotal) {
context.metricRegistry.remove(metricNameForRepairProgress);
context.metricRegistry.register(
metricNameForRepairProgress,
(Gauge<Float>) () -> (float) segmentsDone / segmentsTotal);
}

}

0 comments on commit e24a3a2

Please sign in to comment.