Skip to content

Commit

Permalink
7903450: JMH: Improve -prof perfnorm accuracy with robust estimations
Browse files Browse the repository at this point in the history
  • Loading branch information
shipilev committed Jul 25, 2023
1 parent d88f901 commit 6b09724
Showing 1 changed file with 118 additions and 133 deletions.
251 changes: 118 additions & 133 deletions jmh-core/src/main/java/org/openjdk/jmh/profile/LinuxPerfNormProfiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ public class LinuxPerfNormProfiler implements ExternalProfiler {
private final int delayMs;
private final int lengthMs;
private final boolean useDefaultStats;
private final long highPassFilter;
private final int incrementInterval;
private final boolean isIncrementable;
private final boolean doFilter;

private final Collection<String> supportedEvents = new ArrayList<>();

Expand All @@ -87,9 +86,9 @@ public LinuxPerfNormProfiler(String initLine) throws ProfilerException {
"Lower values may improve accuracy, while increasing the profiling overhead.")
.withRequiredArg().ofType(Integer.class).describedAs("ms").defaultsTo(100);

OptionSpec<Long> optHighPassFilter = parser.accepts("highPassFilter",
"Ignore event increments larger that this.")
.withRequiredArg().ofType(Long.class).describedAs("#").defaultsTo(100_000_000_000L);
OptionSpec<Boolean> optFilter = parser.accepts("filter",
"Filter problematic samples from infrastructure and perf itself.")
.withRequiredArg().ofType(Boolean.class).describedAs("bool").defaultsTo(true);

OptionSpec<Boolean> optDefaultStat = parser.accepts("useDefaultStat",
"Use \"perf stat -d -d -d\" instead of explicit counter list.")
Expand All @@ -103,7 +102,7 @@ public LinuxPerfNormProfiler(String initLine) throws ProfilerException {
delayMs = set.valueOf(optDelay);
lengthMs = set.valueOf(optLength);
incrementInterval = set.valueOf(optIncrementInterval);
highPassFilter = set.valueOf(optHighPassFilter);
doFilter = set.valueOf(optFilter);
useDefaultStats = set.valueOf(optDefaultStat);
userEvents = set.valuesOf(optEvents);
} catch (OptionException e) {
Expand All @@ -116,7 +115,9 @@ public LinuxPerfNormProfiler(String initLine) throws ProfilerException {
}

Collection<String> incremental = Utils.tryWith(PerfSupport.PERF_EXEC, "stat", "--log-fd", "2", "--field-separator", ",", "--interval-print", String.valueOf(incrementInterval), "echo", "1");
isIncrementable = incremental.isEmpty();
if (!incremental.isEmpty()) {
throw new ProfilerException("\\\"perf\\\" is too old, needs incremental mode (-I).");
}

Collection<String> candidateEvents = new ArrayList<>();
if (userEvents != null) {
Expand All @@ -126,7 +127,7 @@ public LinuxPerfNormProfiler(String initLine) throws ProfilerException {
}
}

if (supportedEvents.isEmpty()) {
if (candidateEvents.isEmpty()) {
candidateEvents.addAll(Arrays.asList(interestingEvents));
}

Expand Down Expand Up @@ -154,9 +155,7 @@ public Collection<String> addJVMInvokeOptions(BenchmarkParams params) {
} else {
cmd.addAll(Arrays.asList(PerfSupport.PERF_EXEC, "stat", "--log-fd", "2", "--field-separator", ",", "--event", Utils.join(supportedEvents, ",")));
}
if (isIncrementable) {
cmd.addAll(Arrays.asList("-I", String.valueOf(incrementInterval)));
}
cmd.addAll(Arrays.asList("-I", String.valueOf(incrementInterval)));
return cmd;
}

Expand Down Expand Up @@ -190,8 +189,17 @@ public String getDescription() {
return "Linux perf statistics, normalized by operation count";
}

private static class EventRecord {
final double time;
final double value;
public EventRecord(double time, double value) {
this.time = time;
this.value = value;
}
}

private Collection<? extends Result> process(BenchmarkResult br, File stdOut, File stdErr) {
Multiset<String> events = new HashMultiset<>();
Multimap<String, EventRecord> eventRecords = new HashMultimap<>();

try (FileReader fr = new FileReader(stdErr);
BufferedReader reader = new BufferedReader(fr)) {
Expand All @@ -211,161 +219,138 @@ private Collection<? extends Result> process(BenchmarkResult br, File stdOut, Fi
}

double readFrom = skipMs / 1000D;
double softTo = (skipMs + lenMs) / 1000D;
double readTo = (skipMs + lenMs + incrementInterval) / 1000D;

NumberFormat nf = NumberFormat.getInstance();

String line;

nextline:
while ((line = reader.readLine()) != null) {
if (line.startsWith("#")) continue;

if (isIncrementable) {
String[] split = line.split(",");

String time;
String count;
String event;

if (split.length == 3) {
// perf 3.13: time,count,event
time = split[0].trim();
count = split[1].trim();
event = split[2].trim();
} else if (split.length >= 4) {
// perf >3.13: time,count,<other>,event,<others>
time = split[0].trim();
count = split[1].trim();
event = split[3].trim();
} else {
// Malformed line, ignore
continue nextline;
}
String[] split = line.split(",");

String time;
String count;
String event;

if (split.length == 3) {
// perf 3.13: time,count,event
time = split[0].trim();
count = split[1].trim();
event = split[2].trim();
} else if (split.length >= 4) {
// perf >3.13: time,count,<other>,event,<others>
time = split[0].trim();
count = split[1].trim();
event = split[3].trim();
} else {
// Malformed line, ignore
continue;
}

double multiplier = 1D;
try {
double timeSec = nf.parse(time).doubleValue();
if (timeSec < readFrom) {
// warmup, ignore
continue nextline;
}
if (timeSec > readTo) {
// post-run, ignore
continue nextline;
}

// Handle partial events:
double intervalSec = incrementInterval / 1000D;
if (timeSec - intervalSec < readFrom) {
// Event _starts_ before the measurement window
// .............[============|============
// readFrom timeSec
// [<----------------->| // event
// incrementInterval
//
// Only count the tail after readFrom:

multiplier = (timeSec - readFrom) / intervalSec;
}
if (timeSec > softTo) {
// Event is past the measurement window
// =============].............|............
// softTo timeSec
// [<----------------->| // event
// incrementInterval
//
// Only count the head before softTo:
multiplier = 1 - (timeSec - softTo) / intervalSec;
}

// Defensive, keep multiplier in bounds:
multiplier = Math.max(1D, Math.min(0D, multiplier));
} catch (ParseException e) {
// don't care then, continue
continue nextline;
}
double timeSec = 0D;
try {
timeSec = nf.parse(time).doubleValue();
} catch (ParseException e) {
continue;
}

try {
long lValue = nf.parse(count).longValue();
if (lValue > highPassFilter) {
// anomalous value, pretend we did not see it
continue nextline;
}
events.add(event, (long) (lValue * multiplier));
} catch (ParseException e) {
// do nothing, continue
continue nextline;
if (timeSec < readFrom || timeSec > readTo) {
// Outside measurement window, ignore
continue;
}

}
} else {
int idx = line.lastIndexOf(",");
long lValue = 0L;
try {
lValue = nf.parse(count).longValue();
} catch (ParseException e) {
continue;
}

// Malformed line, ignore
if (idx == -1) continue nextline;
eventRecords.put(event, new EventRecord(timeSec, lValue));
}

String count = line.substring(0, idx).trim();
String event = line.substring(idx + 1).trim();
Map<String, Double> finalThroughputs = new HashMap<>();
for (String key : eventRecords.keys()) {
List<EventRecord> countedEvents = new ArrayList<>(eventRecords.get(key));

// If needed and possible, filter out a few head and tail iterations.
// Head iteration potentially contains the infrastructure startup.
// That iteration would only be handled for getting time, not the counter itself.
// Trailing iterations come with both infrastructure rampdown, and the final
// profiler output on shutdown. Sometimes these manifest as two separate iterations.
final int filteredCount = countedEvents.size() - 2;
if (doFilter && filteredCount > 0) {
countedEvents = countedEvents.subList(0, filteredCount);
}

double s = 0;
double minTime = Double.MAX_VALUE;
double maxTime = Double.MIN_VALUE;

try {
long lValue = nf.parse(count).longValue();
events.add(event, lValue);
} catch (ParseException e) {
// do nothing, continue
continue nextline;
for (int i = 0; i < countedEvents.size(); i++) {
EventRecord v = countedEvents.get(i);
if (i != 0) {
// Do not count the first event in the series, since time interval
// does not actually include it.
s += v.value;
}
minTime = Math.min(minTime, v.time);
maxTime = Math.max(maxTime, v.time);
}
double thr = s / (maxTime - minTime);
finalThroughputs.put(key, thr);
}

BenchmarkResultMetaData md = br.getMetadata();
if (md == null) {
return emptyResults();
}

if (!isIncrementable) {
System.out.println();
System.out.println();
System.out.println("WARNING: Your system uses old \"perf\", which cannot print data incrementally (-I).\n" +
"Therefore, perf performance data includes benchmark warmup.");
long timeMs = md.getStopTime() - md.getMeasurementTime();
if (timeMs == 0) {
return emptyResults();

}
double opsThroughput = 1000D * md.getMeasurementOps() / timeMs;
if (opsThroughput == 0) {
return emptyResults();
}

long totalOpts;
Collection<Result> results = new ArrayList<>();
for (String key : finalThroughputs.keySet()) {
results.add(new PerfResult(key, "#/op", finalThroughputs.get(key) / opsThroughput));
}

BenchmarkResultMetaData md = br.getMetadata();
if (md != null) {
if (isIncrementable) {
totalOpts = md.getMeasurementOps();
} else {
totalOpts = md.getWarmupOps() + md.getMeasurementOps();
}
Collection<Result> results = new ArrayList<>();
for (String key : events.keys()) {
results.add(new PerfResult(key, "#/op", events.count(key) * 1.0 / totalOpts));
}
// Also figure out IPC/CPI, if enough counters available:
{
Double c1 = finalThroughputs.get("cycles");
Double c2 = finalThroughputs.get("cycles:u");

// Also figure out IPC/CPI, if enough counters available:
{
long c1 = events.count("cycles");
long c2 = events.count("cycles:u");
Double i1 = finalThroughputs.get("instructions");
Double i2 = finalThroughputs.get("instructions:u");

long i1 = events.count("instructions");
long i2 = events.count("instructions:u");
Double cycles = (c1 != null) ? c1 : c2;
Double instructions = (i1 != null) ? i1 : i2;

long cycles = (c1 != 0) ? c1 : c2;
long instructions = (i1 != 0) ? i1 : i2;
if (cycles != 0 && instructions != 0) {
results.add(new PerfResult("CPI", "clks/insn", 1.0 * cycles / instructions));
results.add(new PerfResult("IPC", "insns/clk", 1.0 * instructions / cycles));
}
if (cycles != null && instructions != null &&
cycles != 0 && instructions != 0) {
results.add(new PerfResult("CPI", "clks/insn", cycles / instructions));
results.add(new PerfResult("IPC", "insns/clk", instructions / cycles));
}

return results;
} else {
return Collections.singleton(new PerfResult("N/A", "", Double.NaN));
}

return results;
} catch (IOException e) {
throw new IllegalStateException(e);
}
}

private static Set<PerfResult> emptyResults() {
return Collections.singleton(new PerfResult("N/A", "", Double.NaN));
}

static class PerfResult extends ScalarResult {
private static final long serialVersionUID = -1262685915873231436L;

Expand Down

0 comments on commit 6b09724

Please sign in to comment.