Skip to content

Commit

Permalink
[Exegesis] Native clusterization: sub-partition by sched class id
Browse files Browse the repository at this point in the history
Currently native clusterization simply groups all benchmarks
by the opcode of key instruction, but that is suboptimal in certain cases,
e.g. where we can already tell that the particular instructions
already resolve into different sched classes.
  • Loading branch information
LebedevRI committed Sep 7, 2021
1 parent b3b9b29 commit e030f80
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 50 deletions.
@@ -1,9 +1,15 @@
# RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s

# Naive clusterization mainly groups by instruction opcode,
# but it should also partition the benchmarks of the same opcode
# by the sched class. For example, a regular `xor`, and same-operand `xor`
# may have different characteristics, and it will be confusing/misleading
# to group them.

# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
# CHECK-CLUSTERS-NEXT: {{^}}0,
# CHECK-CLUSTERS-SAME: ,1.00{{$}}
# CHECK-CLUSTERS-NEXT: {{^}}0,
# CHECK-CLUSTERS: {{^}}1,
# CHECK-CLUSTERS-SAME: ,0.20{{$}}

---
Expand Down
7 changes: 5 additions & 2 deletions llvm/tools/llvm-exegesis/lib/Analysis.cpp
Expand Up @@ -151,12 +151,15 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
OS << "\n";
}

Analysis::Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
Analysis::Analysis(const Target &Target,
std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
std::unique_ptr<MCInstrInfo> InstrInfo,
const InstructionBenchmarkClustering &Clustering,
double AnalysisInconsistencyEpsilon,
bool AnalysisDisplayUnstableOpcodes,
const std::string &ForceCpuName)
: Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
: Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)),
InstrInfo_(std::move(InstrInfo)),
AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
AnalysisInconsistencyEpsilon),
AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
Expand Down
3 changes: 2 additions & 1 deletion llvm/tools/llvm-exegesis/lib/Analysis.h
Expand Up @@ -36,7 +36,8 @@ namespace exegesis {
// A helper class to analyze benchmark results for a target.
class Analysis {
public:
Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
Analysis(const Target &Target, std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
std::unique_ptr<MCInstrInfo> InstrInfo,
const InstructionBenchmarkClustering &Clustering,
double AnalysisInconsistencyEpsilon,
bool AnalysisDisplayUnstableOpcodes,
Expand Down
94 changes: 54 additions & 40 deletions llvm/tools/llvm-exegesis/lib/Clustering.cpp
Expand Up @@ -8,13 +8,15 @@

#include "Clustering.h"
#include "Error.h"
#include "SchedClassResolution.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include <algorithm>
#include <deque>
#include <string>
#include <vector>
#include <deque>

namespace llvm {
namespace exegesis {
Expand Down Expand Up @@ -183,46 +185,58 @@ void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
}
}

void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
// Given an instruction Opcode, which are the benchmarks of this instruction?
std::vector<SmallVector<size_t, 1>> OpcodeToPoints;
OpcodeToPoints.resize(NumOpcodes);
size_t NumOpcodesSeen = 0;
void InstructionBenchmarkClustering::clusterizeNaive(
const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo) {
// Given an instruction Opcode, which sched class id's are represented,
// and which are the benchmarks for each sched class?
std::vector<SmallMapVector<unsigned, SmallVector<size_t, 1>, 1>>
OpcodeToSchedClassesToPoints;
const unsigned NumOpcodes = InstrInfo.getNumOpcodes();
OpcodeToSchedClassesToPoints.resize(NumOpcodes);
size_t NumClusters = 0;
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
const InstructionBenchmark &Point = Points_[P];
const unsigned Opcode = Point.keyInstruction().getOpcode();
const MCInst &MCI = Point.keyInstruction();
unsigned SchedClassId;
std::tie(SchedClassId, std::ignore) =
ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, MCI);
const unsigned Opcode = MCI.getOpcode();
assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
if (PointsOfOpcode.empty()) // If we previously have not seen any points of
++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
PointsOfOpcode.emplace_back(P);
auto &Points = OpcodeToSchedClassesToPoints[Opcode][SchedClassId];
if (Points.empty()) // If we previously have not seen any points of
++NumClusters; // this opcode's sched class, then new cluster begins.
Points.emplace_back(P);
}
assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
assert(NumOpcodesSeen <= NumOpcodes &&
assert(NumClusters <= NumOpcodes &&
"can't see more opcodes than there are total opcodes");
assert(NumOpcodesSeen <= Points_.size() &&
assert(NumClusters <= Points_.size() &&
"can't see more opcodes than there are total points");

Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
for (ArrayRef<size_t> PointsOfOpcode :
make_filter_range(OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
})) {
// Create a new cluster.
Clusters_.emplace_back(ClusterId::makeValid(
Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
Cluster &CurrentCluster = Clusters_.back();
// Mark points as belonging to the new cluster.
for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
ClusterIdForPoint_[P] = CurrentCluster.Id;
});
// And add all the points of this opcode to the new cluster.
CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
PointsOfOpcode.end());
assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
Clusters_.reserve(NumClusters); // We already know how many clusters there is.
for (const auto &SchedClassesOfOpcode : OpcodeToSchedClassesToPoints) {
if (SchedClassesOfOpcode.empty())
continue;
for (ArrayRef<size_t> PointsOfSchedClass :
make_second_range(SchedClassesOfOpcode)) {
if (PointsOfSchedClass.empty())
continue;
// Create a new cluster.
Clusters_.emplace_back(ClusterId::makeValid(
Clusters_.size(),
/*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass)));
Cluster &CurrentCluster = Clusters_.back();
// Mark points as belonging to the new cluster.
for_each(PointsOfSchedClass, [this, &CurrentCluster](size_t P) {
ClusterIdForPoint_[P] = CurrentCluster.Id;
});
// And add all the points of this opcode's sched class to the new cluster.
CurrentCluster.PointIndices.reserve(PointsOfSchedClass.size());
CurrentCluster.PointIndices.assign(PointsOfSchedClass.begin(),
PointsOfSchedClass.end());
assert(CurrentCluster.PointIndices.size() == PointsOfSchedClass.size());
}
}
assert(Clusters_.size() == NumOpcodesSeen);
assert(Clusters_.size() == NumClusters);
}

// Given an instruction Opcode, we can make benchmarks (measurements) of the
Expand Down Expand Up @@ -317,7 +331,7 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
Optional<unsigned> NumOpcodes) {
const MCSubtargetInfo *SubtargetInfo, const MCInstrInfo *InstrInfo) {
InstructionBenchmarkClustering Clustering(
Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
if (auto Error = Clustering.validateAndSetup()) {
Expand All @@ -330,13 +344,13 @@ Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
if (Mode == ModeE::Dbscan) {
Clustering.clusterizeDbScan(DbscanMinPts);

if (NumOpcodes.hasValue())
Clustering.stabilize(NumOpcodes.getValue());
if (InstrInfo)
Clustering.stabilize(InstrInfo->getNumOpcodes());
} else /*if(Mode == ModeE::Naive)*/ {
if (!NumOpcodes.hasValue())
return make_error<Failure>(
"'naive' clustering mode requires opcode count to be specified");
Clustering.clusterizeNaive(NumOpcodes.getValue());
if (!SubtargetInfo || !InstrInfo)
return make_error<Failure>("'naive' clustering mode requires "
"SubtargetInfo and InstrInfo to be present");
Clustering.clusterizeNaive(*SubtargetInfo, *InstrInfo);
}

return Clustering;
Expand Down
6 changes: 4 additions & 2 deletions llvm/tools/llvm-exegesis/lib/Clustering.h
Expand Up @@ -32,7 +32,8 @@ class InstructionBenchmarkClustering {
static Expected<InstructionBenchmarkClustering>
create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
size_t DbscanMinPts, double AnalysisClusteringEpsilon,
Optional<unsigned> NumOpcodes = None);
const MCSubtargetInfo *SubtargetInfo = nullptr,
const MCInstrInfo *InstrInfo = nullptr);

class ClusterId {
public:
Expand Down Expand Up @@ -126,7 +127,8 @@ class InstructionBenchmarkClustering {
Error validateAndSetup();

void clusterizeDbScan(size_t MinPts);
void clusterizeNaive(unsigned NumOpcodes);
void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
const MCInstrInfo &InstrInfo);

// Stabilization is only needed if dbscan was used to clusterize.
void stabilize(unsigned NumOpcodes);
Expand Down
11 changes: 7 additions & 4 deletions llvm/tools/llvm-exegesis/llvm-exegesis.cpp
Expand Up @@ -435,16 +435,19 @@ static void analysisMain() {
return;
}

std::unique_ptr<MCSubtargetInfo> SubtargetInfo(
TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, ""));

std::unique_ptr<MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
assert(InstrInfo && "Unable to create instruction info!");

const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get()));

const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
AnalysisInconsistencyEpsilon,
AnalysisDisplayUnstableOpcodes, CpuName);
const Analysis Analyzer(
*TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName);

maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
AnalysisClustersOutputFile);
Expand Down

0 comments on commit e030f80

Please sign in to comment.