Skip to content

Commit

Permalink
Add schedule triggers for incremental repairs based on % unrepaired m…
Browse files Browse the repository at this point in the history
…etrics
  • Loading branch information
adejanovski committed Oct 12, 2021
1 parent 5b61e41 commit b8a89e6
Show file tree
Hide file tree
Showing 31 changed files with 746 additions and 250 deletions.
3 changes: 2 additions & 1 deletion src/docs/content/docs/usage/schedule.md
Expand Up @@ -20,7 +20,7 @@ Click the *schedule* menu item on the left side to navigate to the Schedules pag

## Fill in the Details

Enter values for the keyspace, tables, owner and other fields and click *Add Schedule* button. The details for adding a schedule are similar to the details for [Repair](../single) form except the "Clause" field is replaced with two fields; "Start time" and "Interval in days". See the table below for further information the two fields.
Enter values for the keyspace, tables, owner and other fields and click *Add Schedule* button. The details for adding a schedule are similar to the details for [Repair](../single) form except the "Cause" field is replaced with three fields; "Start time", "Interval in days" and "Percent unrepaired threshold". See the table below for further information the two fields.

{{< screenshot src="/img/add_schedule.png" />}}

Expand All @@ -30,6 +30,7 @@ Enter values for the keyspace, tables, owner and other fields and click *Add Sch
---|---
**Start time** | The time to trigger repairs, based in GMT.
**Interval in days** | The frequency for the schedule to be run.
**Percent unrepaired threshold** | *For incremental repair only!* Sets the percentage of unrepaired data over which a repair run will be started for this schedule. As soon as one table in the set of tables managed by this schedule gets over the threshold, the run will be triggered.

<br/>

Expand Down
2 changes: 1 addition & 1 deletion src/docs/content/faq.md
@@ -1,7 +1,7 @@
# Frequently Asked Questions


### Why use Reaper instead of noddetool + cron?
### Why use Reaper instead of nodetool + cron?

While it's possible to set up crontab to call nodetool, it requires staggering the crons to ensure overlap is kept to a minimum. Reaper is able to intelligently schedule repairs to avoid putting too much load on the cluster, avoiding impacting performance. Reaper also offers a simple UI to schedule repairs as granularly as needed.

Expand Down
3 changes: 2 additions & 1 deletion src/server/src/main/java/io/cassandrareaper/AppContext.java
Expand Up @@ -28,6 +28,7 @@
import java.util.concurrent.atomic.AtomicBoolean;

import com.codahale.metrics.MetricRegistry;
import com.datastax.driver.core.utils.UUIDs;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -39,7 +40,7 @@ public final class AppContext {

public static final String REAPER_INSTANCE_ADDRESS = Private.initialiseInstanceAddress();

public final UUID reaperInstanceId = UUID.randomUUID();
public final UUID reaperInstanceId = UUIDs.timeBased();
public final AtomicBoolean isRunning = new AtomicBoolean(true);
public final AtomicBoolean isDistributed = new AtomicBoolean(false);
public IStorage storage;
Expand Down
Expand Up @@ -442,7 +442,7 @@ public Integer getPercentRepairedCheckIntervalMinutes() {
}

@JsonProperty("percentRepairedCheckIntervalMinutes")
public void setpercentRepairedCheckIntervalMinutes(Integer percentRepairedCheckIntervalMinutes) {
public void setPercentRepairedCheckIntervalMinutes(Integer percentRepairedCheckIntervalMinutes) {
this.percentRepairedCheckIntervalMinutes = percentRepairedCheckIntervalMinutes;
}

Expand Down Expand Up @@ -544,6 +544,13 @@ public static final class AutoSchedulingConfiguration {
@JsonProperty
private Boolean adaptive;

@JsonProperty
private Boolean incremental;

@JsonProperty
private Integer percentUnrepairedThreshold;


public Boolean isEnabled() {
return enabled;
}
Expand Down Expand Up @@ -612,6 +619,22 @@ public void setAdaptive(Boolean adaptive) {
this.adaptive = adaptive;
}

public Boolean incremental() {
return incremental == null ? false : incremental;
}

public void setIncremental(Boolean incremental) {
this.incremental = incremental;
}

public Integer getPercentUnrepairedThreshold() {
return percentUnrepairedThreshold == null ? -1 : percentUnrepairedThreshold;
}

public void setPercentUnrepairedThreshold(Integer percentUnrepairedThreshold) {
this.percentUnrepairedThreshold = percentUnrepairedThreshold;
}

@Override
public String toString() {
return "AutoSchedulingConfiguration{"
Expand Down
Expand Up @@ -47,12 +47,23 @@ public class EditableRepairSchedule {
@Max(value = 1000)
protected Integer segmentCountPerNode;

@JsonProperty(value = "percent_unrepaired_threshold")
@Min(value = -1)
@Max(value = 99)
protected Integer percentUnrepairedThreshold;

protected Boolean adaptive;


public EditableRepairSchedule() {
this.owner = null;
this.repairParallelism = null;
this.intensity = null;
this.daysBetween = null;
this.segmentCountPerNode = null;
this.percentUnrepairedThreshold = null;
this.adaptive = null;

}

public String getOwner() {
Expand Down Expand Up @@ -94,4 +105,21 @@ public Integer getSegmentCountPerNode() {
public void setSegmentCountPerNode(Integer segmentCountPerNode) {
this.segmentCountPerNode = segmentCountPerNode;
}

public Integer getPercentUnrepairedThreshold() {
return percentUnrepairedThreshold == null ? -1 : percentUnrepairedThreshold;
}

public void setPercentUnrepairedThreshold(Integer percentUnrepairedThreshold) {
this.percentUnrepairedThreshold = percentUnrepairedThreshold;
}

public Boolean getAdaptive() {
return adaptive;
}

public void setAdaptive(boolean adaptive) {
this.adaptive = adaptive;
}

}
Expand Up @@ -36,12 +36,9 @@ public final class RepairSchedule extends EditableRepairSchedule {
private final State state;
private final DateTime nextActivation;
private final ImmutableList<UUID> runHistory;
private final RepairParallelism repairParallelism;
private final double intensity;
private final DateTime creationTime;
private final DateTime pauseTime;
private final int segmentCountPerNode;
private final boolean adaptive;
private final UUID lastRun;

private RepairSchedule(Builder builder, UUID id) {
this.id = id;
Expand All @@ -57,6 +54,8 @@ private RepairSchedule(Builder builder, UUID id) {
this.pauseTime = builder.pauseTime;
this.segmentCountPerNode = builder.segmentCountPerNode;
this.adaptive = builder.adaptive;
this.percentUnrepairedThreshold = builder.percentUnrepairedThreshold;
this.lastRun = builder.lastRun;
}

public static Builder builder(UUID repairUnitId) {
Expand Down Expand Up @@ -87,6 +86,10 @@ public ImmutableList<UUID> getRunHistory() {
return runHistory;
}

public UUID getLastRun() {
return lastRun;
}

/**
* Required for JDBI mapping into database. Generic collection type would be hard to map into Postgres array types.
*/
Expand All @@ -95,34 +98,14 @@ public LongCollectionSqlType getRunHistorySql() {
return new LongCollectionSqlType(list);
}

public Integer getSegmentCountPerNode() {
return segmentCountPerNode;
}

public RepairParallelism getRepairParallelism() {
return repairParallelism;
}

public Double getIntensity() {
return intensity;
}

public DateTime getCreationTime() {
return creationTime;
}

public String getOwner() {
return owner;
}

public DateTime getPauseTime() {
return pauseTime;
}

public boolean getAdaptive() {
return adaptive;
}

public Builder with() {
return new Builder(this);
}
Expand Down Expand Up @@ -152,6 +135,8 @@ public static final class Builder {
private DateTime pauseTime;
private Integer segmentCountPerNode;
private boolean adaptive = false;
private Integer percentUnrepairedThreshold;
private UUID lastRun;

private Builder(UUID repairUnitId) {
this.repairUnitId = repairUnitId;
Expand All @@ -168,9 +153,10 @@ private Builder(RepairSchedule original) {
creationTime = original.creationTime;
owner = original.owner;
pauseTime = original.pauseTime;
intensity = original.intensity;
segmentCountPerNode = original.segmentCountPerNode;
adaptive = original.adaptive;
percentUnrepairedThreshold = original.percentUnrepairedThreshold;
lastRun = original.lastRun;
}

public Builder state(State state) {
Expand Down Expand Up @@ -228,6 +214,16 @@ public Builder adaptive(boolean adaptive) {
return this;
}

public Builder percentUnrepairedThreshold(Integer percentUnrepairedThreshold) {
this.percentUnrepairedThreshold = percentUnrepairedThreshold;
return this;
}

public Builder lastRun(UUID lastRun) {
this.lastRun = lastRun;
return this;
}

public RepairSchedule build(UUID id) {
Preconditions.checkState(null != daysBetween, "daysBetween(..) must be called before build(..)");
Preconditions.checkState(null != nextActivation, "nextActivation(..) must be called before build(..)");
Expand Down
Expand Up @@ -126,7 +126,9 @@ public Response patchRepairSchedule(
editableRepairSchedule.getRepairParallelism(),
editableRepairSchedule.getIntensity(),
editableRepairSchedule.getDaysBetween(),
editableRepairSchedule.getSegmentCountPerNode()
editableRepairSchedule.getSegmentCountPerNode(),
editableRepairSchedule.getAdaptive(),
editableRepairSchedule.getPercentUnrepairedThreshold()
);

// Attempt to update the schedule
Expand Down Expand Up @@ -167,7 +169,8 @@ public Response addRepairSchedule(
@QueryParam("repairThreadCount") Optional<Integer> repairThreadCountParam,
@QueryParam("force") Optional<String> forceParam,
@QueryParam("timeout") Optional<Integer> timeoutParam,
@QueryParam("adaptive") Optional<String> adaptiveParam) {
@QueryParam("adaptive") Optional<String> adaptiveParam,
@QueryParam("percentUnrepairedThreshold") Optional<Integer> percentUnrepairedParam) {

try {
Response possibleFailResponse = RepairRunResource.checkRequestForAddRepair(
Expand Down Expand Up @@ -264,6 +267,12 @@ public Response addRepairSchedule(
.build();
}

if (percentUnrepairedParam.orElse(-1) > 0 && !incremental) {
return Response.status(Response.Status.BAD_REQUEST)
.entity("Triggering schedules on % unrepaired threshold is only allowed for incremental repairs.")
.build();
}

// explicitly force a schedule even if the schedule conflicts
boolean force = (forceParam.isPresent() ? Boolean.parseBoolean(forceParam.get()) : false);

Expand Down Expand Up @@ -293,7 +302,8 @@ public Response addRepairSchedule(
getSegmentCount(segmentCountPerNode),
getIntensity(intensityStr),
force,
adaptive);
adaptive,
percentUnrepairedParam.orElse(-1));

} catch (ReaperException e) {
LOG.error(e.getMessage(), e);
Expand All @@ -313,7 +323,8 @@ private Response addRepairSchedule(
int segments,
Double intensity,
boolean force,
boolean adaptive) {
boolean adaptive,
int percentUnrepairedThreshold) {

Optional<RepairSchedule> conflictingRepairSchedule
= repairScheduleService.identicalRepairUnit(cluster, unitBuilder);
Expand Down Expand Up @@ -353,9 +364,13 @@ private Response addRepairSchedule(

Preconditions
.checkState(unit.getIncrementalRepair() == incremental, "%s!=%s", unit.getIncrementalRepair(), incremental);
Preconditions
.checkState((percentUnrepairedThreshold > 0 && incremental) || percentUnrepairedThreshold <= 0,
"Setting a % repaired threshold can only be done on incremental schedules");

RepairSchedule newRepairSchedule = repairScheduleService
.storeNewRepairSchedule(cluster, unit, days, next, owner, segments, parallel, intensity, force, adaptive);
.storeNewRepairSchedule(
cluster, unit, days, next, owner, segments, parallel, intensity, force, adaptive, percentUnrepairedThreshold);

return Response.created(buildRepairScheduleUri(uriInfo, newRepairSchedule)).build();
}
Expand Down Expand Up @@ -661,14 +676,18 @@ public List<PercentRepairedMetric> getPercentRepairedMetricsForSchedule(
* @param intensity - The intensity value to be used in the update
* @param scheduleDaysBetween - The days between value to be used in the update
* @param segmentCountPerNode - The segments per node value to be used in the update
* @param adaptive - Whether or not the schedule is adaptive
* @param percentUnrepairedThreshold - Threshold of unrepaired percentage that triggers a repair
*/
protected static RepairSchedule applyRepairPatchParams(
final RepairSchedule repairSchedule,
final String owner,
final RepairParallelism repairParallelism,
final Double intensity,
final Integer scheduleDaysBetween,
final Integer segmentCountPerNode
final Integer segmentCountPerNode,
final Boolean adaptive,
final Integer percentUnrepairedThreshold
) {
if (repairSchedule == null) {
return null;
Expand All @@ -683,6 +702,8 @@ protected static RepairSchedule applyRepairPatchParams(
.segmentCountPerNode(segmentCountPerNode != null
? segmentCountPerNode
: repairSchedule.getSegmentCountPerNode())
.percentUnrepairedThreshold(percentUnrepairedThreshold)
.adaptive(adaptive != null ? adaptive : false)
.build(repairSchedule.getId());
}
}
Expand Up @@ -94,6 +94,9 @@ public final class RepairScheduleStatus {
@JsonProperty("adaptive")
private boolean adaptive;

@JsonProperty("percent_unrepaired_threshold")
private int percentUnrepairedThreshold;

/**
* Default public constructor Required for Jackson JSON parsing.
*/
Expand Down Expand Up @@ -121,7 +124,8 @@ public RepairScheduleStatus(
int repairThreadCount,
UUID repairUnitId,
int segmentTimeout,
boolean adaptive) {
boolean adaptive,
int percentUnrepairedThreshold) {

this.id = id;
this.owner = owner;
Expand All @@ -144,6 +148,7 @@ public RepairScheduleStatus(
this.repairUnitId = repairUnitId;
this.segmentTimeout = segmentTimeout;
this.adaptive = adaptive;
this.percentUnrepairedThreshold = percentUnrepairedThreshold;
}

public RepairScheduleStatus(RepairSchedule repairSchedule, RepairUnit repairUnit) {
Expand All @@ -168,7 +173,8 @@ public RepairScheduleStatus(RepairSchedule repairSchedule, RepairUnit repairUnit
repairUnit.getRepairThreadCount(),
repairUnit.getId(),
repairUnit.getTimeout(),
repairSchedule.getAdaptive());
repairSchedule.getAdaptive(),
repairSchedule.getPercentUnrepairedThreshold() == null ? -1 : repairSchedule.getPercentUnrepairedThreshold());
}

public UUID getId() {
Expand Down Expand Up @@ -374,4 +380,12 @@ public boolean getAdaptive() {
public void setAdaptive(boolean adaptive) {
this.adaptive = adaptive;
}

public int getPercentUnrepairedThreshold() {
return percentUnrepairedThreshold;
}

public void setPercentUnrepairedThreshold(int percentUnrepairedThreshold) {
this.percentUnrepairedThreshold = percentUnrepairedThreshold;
}
}

0 comments on commit b8a89e6

Please sign in to comment.