Skip to content

Commit

Permalink
Fix bottleneck in InternalResourceGroup::internalGenerateCpuQuota
Browse files Browse the repository at this point in the history
internalGenerateCpuQuota() is called from
InternalResourceGroupManager::refreshAndStartQueries(), which is called
every millisecond to start queries. Under a high query submission rate
internalGenerateCpuQuota() becomes a bottleneck when Math.multiplyExact()
throws an exception (which is possible with the default value of
cpuQuotaGenerationMillisPerSecond) while holding the monitor of the root
resource group. Plenty of methods in the query starting path grabs the
monitor of the root resource group, and since internalGenerateCpuQuota()
locks the root for extended amount of time (due to filling in the stack
traces when exceptions are thrown) the rate at which the coordinator
starts queries drops significantly. This change fixes that by using the
guava math methods, which don't throw exceptions on overflow.

In a performance test before the fix, given a config that allows 200
concurrent queries the coordinator was only able to run < 50 queries
in the cluster. After this fix it was able to saturate the entire cluster.
  • Loading branch information
nezihyigitbasi committed Aug 6, 2018
1 parent 85013d8 commit 2ac73e1
Showing 1 changed file with 7 additions and 18 deletions.
Expand Up @@ -60,6 +60,9 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.math.LongMath.saturatedAdd;
import static com.google.common.math.LongMath.saturatedMultiply;
import static com.google.common.math.LongMath.saturatedSubtract;
import static io.airlift.units.DataSize.Unit.BYTE;
import static java.lang.Math.min;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -718,12 +721,7 @@ private void queryFinished(QueryExecution query)
if (query.getState() == QueryState.FINISHED || query.getQueryInfo().getErrorType() == USER_ERROR) {
InternalResourceGroup group = this;
while (group != null) {
try {
group.cpuUsageMillis = Math.addExact(group.cpuUsageMillis, query.getTotalCpuTime().toMillis());
}
catch (ArithmeticException e) {
group.cpuUsageMillis = Long.MAX_VALUE;
}
group.cpuUsageMillis = saturatedAdd(group.cpuUsageMillis, query.getTotalCpuTime().toMillis());
group = group.parent.orElse(null);
}
}
Expand Down Expand Up @@ -780,20 +778,11 @@ protected void internalGenerateCpuQuota(long elapsedSeconds)
{
checkState(Thread.holdsLock(root), "Must hold lock to generate cpu quota");
synchronized (root) {
long newQuota;
try {
newQuota = Math.multiplyExact(elapsedSeconds, cpuQuotaGenerationMillisPerSecond);
}
catch (ArithmeticException e) {
newQuota = Long.MAX_VALUE;
}
try {
cpuUsageMillis = Math.subtractExact(cpuUsageMillis, newQuota);
}
catch (ArithmeticException e) {
long newQuota = saturatedMultiply(elapsedSeconds, cpuQuotaGenerationMillisPerSecond);
cpuUsageMillis = saturatedSubtract(cpuUsageMillis, newQuota);
if (cpuUsageMillis < 0 || cpuUsageMillis == Long.MAX_VALUE) {
cpuUsageMillis = 0;
}
cpuUsageMillis = Math.max(0, cpuUsageMillis);
for (InternalResourceGroup group : subGroups.values()) {
group.internalGenerateCpuQuota(elapsedSeconds);
}
Expand Down

0 comments on commit 2ac73e1

Please sign in to comment.