Skip to content

Commit c0a15c5

Browse files
author
Josef Eisl
committed
[GR-6817] [GR-1260] Mitigate out-of-memory problem in linear scan.
PullRequest: graal/1282
2 parents 096f11b + 524ecfc commit c0a15c5

File tree

2 files changed

+95
-26
lines changed

2 files changed

+95
-26
lines changed

compiler/mx.compiler/mx_graal_benchmark.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,58 @@ def rules(self, out, benchmarks, bmSuiteArgs):
331331
] + super(CounterBenchmarkMixin, self).rules(out, benchmarks, bmSuiteArgs)
332332

333333

334-
class DaCapoTimingBenchmarkMixin(TimingBenchmarkMixin, CounterBenchmarkMixin):
334+
class MemUseTrackerBenchmarkMixin(DebugValueBenchmarkMixin):
335+
trackers = [
336+
# LIR stages
337+
"LIRPhaseMemUse_AllocationStage",
338+
"LIRPhaseMemUse_PostAllocationOptimizationStage",
339+
"LIRPhaseMemUse_PreAllocationOptimizationStage",
340+
# RA phases
341+
"LIRPhaseMemUse_LinearScanPhase",
342+
"LIRPhaseMemUse_GlobalLivenessAnalysisPhase",
343+
"LIRPhaseMemUse_TraceBuilderPhase",
344+
"LIRPhaseMemUse_TraceRegisterAllocationPhase",
345+
]
346+
name_re = re.compile(r"(?P<name>\w+)_Accm")
347+
348+
@staticmethod
349+
def counterArgs():
350+
return "-Dgraal.MemUseTrackers=" + ','.join(MemUseTrackerBenchmarkMixin.trackers)
351+
352+
def vmArgs(self, bmSuiteArgs):
353+
vmArgs = [MemUseTrackerBenchmarkMixin.counterArgs()] + super(MemUseTrackerBenchmarkMixin, self).vmArgs(bmSuiteArgs)
354+
return vmArgs
355+
356+
@staticmethod
357+
def filterResult(r):
358+
m = MemUseTrackerBenchmarkMixin.name_re.match(r['name'])
359+
if m:
360+
name = m.groupdict()['name']
361+
if name in MemUseTrackerBenchmarkMixin.trackers:
362+
r['name'] = name
363+
return r
364+
return None
365+
366+
def shorten_vm_flags(self, args):
367+
# not need for timer names
368+
filtered_args = [x for x in args if not x.startswith("-Dgraal.MemUseTrackers=")]
369+
return super(MemUseTrackerBenchmarkMixin, self).shorten_vm_flags(filtered_args)
370+
371+
def rules(self, out, benchmarks, bmSuiteArgs):
372+
return [
373+
DebugValueRule(
374+
debug_value_file=self.get_csv_filename(),
375+
benchmark=self.getBenchmarkName(),
376+
bench_suite=self.benchSuiteName(),
377+
metric_name="allocated-memory",
378+
metric_unit="B",
379+
vm_flags=self.shorten_vm_flags(self.vmArgs(bmSuiteArgs)),
380+
filter_fn=MemUseTrackerBenchmarkMixin.filterResult,
381+
),
382+
] + super(MemUseTrackerBenchmarkMixin, self).rules(out, benchmarks, bmSuiteArgs)
383+
384+
385+
class DaCapoTimingBenchmarkMixin(TimingBenchmarkMixin, CounterBenchmarkMixin, MemUseTrackerBenchmarkMixin):
335386

336387
def host_vm_config_name(self, host_vm, vm):
337388
return super(DaCapoTimingBenchmarkMixin, self).host_vm_config_name(host_vm, vm) + "-timing"

compiler/src/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/alloc/lsra/LinearScanLifetimeAnalysisPhase.java

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -160,21 +160,23 @@ void computeLocalLiveSets() {
160160
intervalInLoop = new BitMap2D(allocator.operandSize(), allocator.numLoops());
161161

162162
try {
163+
final BitSet liveGenScratch = new BitSet(liveSize);
164+
final BitSet liveKillScratch = new BitSet(liveSize);
163165
// iterate all blocks
164166
for (final AbstractBlockBase<?> block : allocator.sortedBlocks()) {
165167
try (Indent indent = debug.logAndIndent("compute local live sets for block %s", block)) {
166168

167-
final BitSet liveGen = new BitSet(liveSize);
168-
final BitSet liveKill = new BitSet(liveSize);
169+
liveGenScratch.clear();
170+
liveKillScratch.clear();
169171

170172
ArrayList<LIRInstruction> instructions = allocator.getLIR().getLIRforBlock(block);
171173
int numInst = instructions.size();
172174

173175
ValueConsumer useConsumer = (operand, mode, flags) -> {
174176
if (isVariable(operand)) {
175177
int operandNum = allocator.operandNumber(operand);
176-
if (!liveKill.get(operandNum)) {
177-
liveGen.set(operandNum);
178+
if (!liveKillScratch.get(operandNum)) {
179+
liveGenScratch.set(operandNum);
178180
if (debug.isLogEnabled()) {
179181
debug.log("liveGen for operand %d(%s)", operandNum, operand);
180182
}
@@ -185,14 +187,14 @@ void computeLocalLiveSets() {
185187
}
186188

187189
if (allocator.detailedAsserts) {
188-
verifyInput(block, liveKill, operand);
190+
verifyInput(block, liveKillScratch, operand);
189191
}
190192
};
191193
ValueConsumer stateConsumer = (operand, mode, flags) -> {
192194
if (LinearScan.isVariableOrRegister(operand)) {
193195
int operandNum = allocator.operandNumber(operand);
194-
if (!liveKill.get(operandNum)) {
195-
liveGen.set(operandNum);
196+
if (!liveKillScratch.get(operandNum)) {
197+
liveGenScratch.set(operandNum);
196198
if (debug.isLogEnabled()) {
197199
debug.log("liveGen in state for operand %d(%s)", operandNum, operand);
198200
}
@@ -202,7 +204,7 @@ void computeLocalLiveSets() {
202204
ValueConsumer defConsumer = (operand, mode, flags) -> {
203205
if (isVariable(operand)) {
204206
int varNum = allocator.operandNumber(operand);
205-
liveKill.set(varNum);
207+
liveKillScratch.set(varNum);
206208
if (debug.isLogEnabled()) {
207209
debug.log("liveKill for operand %d(%s)", varNum, operand);
208210
}
@@ -217,7 +219,7 @@ void computeLocalLiveSets() {
217219
* be processed in live sets. Process them only in debug mode so that
218220
* this can be checked
219221
*/
220-
verifyTemp(liveKill, operand);
222+
verifyTemp(liveKillScratch, operand);
221223
}
222224
};
223225

@@ -239,10 +241,11 @@ void computeLocalLiveSets() {
239241
} // end of instruction iteration
240242

241243
BlockData blockSets = allocator.getBlockData(block);
242-
blockSets.liveGen = liveGen;
243-
blockSets.liveKill = liveKill;
244-
blockSets.liveIn = new BitSet(liveSize);
245-
blockSets.liveOut = new BitSet(liveSize);
244+
blockSets.liveGen = trimClone(liveGenScratch);
245+
blockSets.liveKill = trimClone(liveKillScratch);
246+
// sticky size, will get non-sticky in computeGlobalLiveSets
247+
blockSets.liveIn = new BitSet(0);
248+
blockSets.liveOut = new BitSet(0);
246249

247250
if (debug.isLogEnabled()) {
248251
debug.log("liveGen B%d %s", block.getId(), blockSets.liveGen);
@@ -292,7 +295,7 @@ protected void computeGlobalLiveSets() {
292295
boolean changeOccurred;
293296
boolean changeOccurredInBlock;
294297
int iterationCount = 0;
295-
BitSet liveOut = new BitSet(allocator.liveSetSize()); // scratch set for calculations
298+
BitSet scratch = new BitSet(allocator.liveSetSize()); // scratch set for calculations
296299

297300
/*
298301
* Perform a backward dataflow analysis to compute liveOut and liveIn for each block.
@@ -315,22 +318,16 @@ protected void computeGlobalLiveSets() {
315318
*/
316319
int n = block.getSuccessorCount();
317320
if (n > 0) {
318-
liveOut.clear();
321+
scratch.clear();
319322
// block has successors
320323
if (n > 0) {
321324
for (AbstractBlockBase<?> successor : block.getSuccessors()) {
322-
liveOut.or(allocator.getBlockData(successor).liveIn);
325+
scratch.or(allocator.getBlockData(successor).liveIn);
323326
}
324327
}
325328

326-
if (!blockSets.liveOut.equals(liveOut)) {
327-
/*
328-
* A change occurred. Swap the old and new live out sets to avoid
329-
* copying.
330-
*/
331-
BitSet temp = blockSets.liveOut;
332-
blockSets.liveOut = liveOut;
333-
liveOut = temp;
329+
if (!blockSets.liveOut.equals(scratch)) {
330+
blockSets.liveOut = trimClone(scratch);
334331

335332
changeOccurred = true;
336333
changeOccurredInBlock = true;
@@ -344,13 +341,20 @@ protected void computeGlobalLiveSets() {
344341
*
345342
* Note: liveIn has to be computed only in first iteration or if liveOut
346343
* has changed!
344+
*
345+
* Note: liveIn set can only grow, never shrink. No need to clear it.
347346
*/
348347
BitSet liveIn = blockSets.liveIn;
349-
liveIn.clear();
348+
/*
349+
* BitSet#or will call BitSet#ensureSize (since the bit set is of length
350+
* 0 initially) and set sticky to false
351+
*/
350352
liveIn.or(blockSets.liveOut);
351353
liveIn.andNot(blockSets.liveKill);
352354
liveIn.or(blockSets.liveGen);
353355

356+
liveIn.clone(); // trimToSize()
357+
354358
if (debug.isLogEnabled()) {
355359
debug.log("block %d: livein = %s, liveout = %s", block.getId(), liveIn, blockSets.liveOut);
356360
}
@@ -384,6 +388,20 @@ protected void computeGlobalLiveSets() {
384388
}
385389
}
386390

391+
/**
392+
* Creates a trimmed copy a bit set.
393+
*
394+
* {@link BitSet#clone()} cannot be used since it will not {@linkplain BitSet#trimToSize trim}
395+
* the array if the bit set is {@linkplain BitSet#sizeIsSticky sticky}.
396+
*/
397+
@SuppressWarnings("javadoc")
398+
private static BitSet trimClone(BitSet set) {
399+
BitSet trimmedSet = new BitSet(0); // zero-length words array, sticky
400+
trimmedSet.or(set); // words size ensured to be words-in-use of set,
401+
// also makes it non-sticky
402+
return trimmedSet;
403+
}
404+
387405
@SuppressWarnings("try")
388406
protected void reportFailure(int numBlocks) {
389407
try (DebugContext.Scope s = debug.forceLog()) {

0 commit comments

Comments
 (0)