99 changes: 99 additions & 0 deletions libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//===-- Analyze benchmark JSON files ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H
#define LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H

#include "automemcpy/FunctionDescriptor.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include <array>
#include <vector>

namespace llvm {
namespace automemcpy {

// A Grade as in the Majority Judgment voting system.
struct Grade {
enum GradeEnum {
EXCELLENT,
VERY_GOOD,
GOOD,
PASSABLE,
INADEQUATE,
MEDIOCRE,
BAD,
ARRAY_SIZE,
};

// Returns a human readable string of the enum.
static StringRef getString(const GradeEnum &GE);

// Turns 'Score' into a GradeEnum.
static GradeEnum judge(double Score);
};

// A 'GradeEnum' indexed array with counts for each grade.
using GradeHistogram = std::array<size_t, Grade::ARRAY_SIZE>;

// Identifies a Function by its name and type. Used as a key in a map.
struct FunctionId {
StringRef Name;
FunctionType Type;
COMPARABLE_AND_HASHABLE(FunctionId, Type, Name)
};

struct PerDistributionData {
double MedianBytesPerSecond; // Median of samples for this distribution.
double Score; // Normalized score for this distribution.
Grade::GradeEnum Grade; // Grade for this distribution.
};

struct FunctionData {
FunctionId Id;
StringMap<PerDistributionData> PerDistributionData;
GradeHistogram GradeHisto = {}; // GradeEnum indexed array
Grade::GradeEnum FinalGrade = Grade::BAD; // Overall grade for this function
};

// Identifies a Distribution by its name. Used as a key in a map.
struct DistributionId {
StringRef Name;
COMPARABLE_AND_HASHABLE(DistributionId, Name)
};

// Identifies a Sample by its distribution and function. Used as a key in a map.
struct SampleId {
FunctionId Function;
DistributionId Distribution;
COMPARABLE_AND_HASHABLE(SampleId, Function.Type, Function.Name,
Distribution.Name)
};

// A SampleId with an associated measured throughput.
struct Sample {
SampleId Id;
double BytesPerSecond = 0;
};

// This function collects Samples that belong to the same distribution and
// function and retains the median one. It then stores each of them into a
// 'FunctionData' and returns them as a vector.
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples);

// Normalize the function's throughput per distribution.
void fillScores(MutableArrayRef<FunctionData> Functions);

// Convert scores into Grades, stores an histogram of Grade for each functions
// and cast a median grade for the function.
void castVotes(MutableArrayRef<FunctionData> Functions);

} // namespace automemcpy
} // namespace llvm

#endif // LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H
32 changes: 32 additions & 0 deletions libc/benchmarks/automemcpy/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
add_library(automemcpy_codegen CodeGen.cpp)
target_link_libraries(automemcpy_codegen PUBLIC LLVMSupport)
target_compile_options(automemcpy_codegen PUBLIC -fno-rtti)
target_include_directories(automemcpy_codegen PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR})

add_executable(automemcpy_codegen_main CodeGenMain.cpp RandomFunctionGenerator.cpp)
target_link_libraries(automemcpy_codegen_main PUBLIC automemcpy_codegen ${Z3_LIBRARIES})
target_compile_options(automemcpy_codegen_main PUBLIC -fno-rtti)

set(Implementations "${CMAKE_CURRENT_BINARY_DIR}/Implementations.cpp")
add_custom_command(
OUTPUT ${Implementations}
COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/automemcpy_codegen_main" > "${Implementations}"
COMMAND echo "automemcpy implementations generated in ${Implementations}"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
DEPENDS automemcpy_codegen_main
)

add_library(automemcpy_implementations "${Implementations}")
target_link_libraries(automemcpy_implementations PUBLIC LLVMSupport libc-memory-benchmark)
target_include_directories(automemcpy_implementations PRIVATE ${LIBC_SOURCE_DIR} ${LIBC_AUTOMEMCPY_INCLUDE_DIR})
target_compile_options(automemcpy_implementations PUBLIC -fno-rtti PRIVATE ${LIBC_COMPILE_OPTIONS_NATIVE} "SHELL:-mllvm -combiner-global-alias-analysis" -fno-builtin)

add_executable(automemcpy EXCLUDE_FROM_ALL ${LIBC_SOURCE_DIR}/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp)
target_link_libraries(automemcpy PRIVATE libc-memory-benchmark benchmark_main automemcpy_implementations)

add_library(automemcpy_result_analyzer_lib EXCLUDE_FROM_ALL ResultAnalyzer.cpp)
target_link_libraries(automemcpy_result_analyzer_lib PUBLIC LLVMSupport)
target_include_directories(automemcpy_result_analyzer_lib PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR})

add_executable(automemcpy_result_analyzer EXCLUDE_FROM_ALL ResultAnalyzerMain.cpp)
target_link_libraries(automemcpy_result_analyzer PRIVATE automemcpy_result_analyzer_lib automemcpy_implementations)
646 changes: 646 additions & 0 deletions libc/benchmarks/automemcpy/lib/CodeGen.cpp

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions libc/benchmarks/automemcpy/lib/CodeGenMain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "automemcpy/CodeGen.h"
#include "automemcpy/RandomFunctionGenerator.h"
#include <unordered_set>

namespace llvm {
namespace automemcpy {

std::vector<FunctionDescriptor> generateFunctionDescriptors() {
std::unordered_set<FunctionDescriptor, FunctionDescriptor::Hasher> Seen;
std::vector<FunctionDescriptor> FunctionDescriptors;
RandomFunctionGenerator P;
while (Optional<FunctionDescriptor> MaybeFD = P.next()) {
FunctionDescriptor FD = *MaybeFD;
if (Seen.count(FD)) // FIXME: Z3 sometimes returns twice the same object.
continue;
Seen.insert(FD);
FunctionDescriptors.push_back(std::move(FD));
}
return FunctionDescriptors;
}

} // namespace automemcpy
} // namespace llvm

int main(int, char **) {
llvm::automemcpy::Serialize(llvm::outs(),
llvm::automemcpy::generateFunctionDescriptors());
}
279 changes: 279 additions & 0 deletions libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
//===-- Generate random but valid function descriptors -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "automemcpy/RandomFunctionGenerator.h"

#include <llvm/ADT/None.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/raw_ostream.h>

#include <set>

namespace llvm {
namespace automemcpy {

// Exploration parameters
// ----------------------
// Here we define a set of values that will contraint the exploration and
// limit combinatorial explosion.

// We limit the number of cases for individual sizes to sizes up to 4.
// More individual sizes don't bring much over the overlapping strategy.
static constexpr int kMaxIndividualSize = 4;

// We limit Overlapping Strategy to sizes up to 256.
// An overlap of 256B means accessing 128B at once which is usually not
// feasible by current CPUs. We rely on the compiler to generate multiple
// loads/stores if needed but higher sizes are unlikely to benefit from hardware
// acceleration.
static constexpr int kMaxOverlapSize = 256;

// For the loop strategies, we make sure that they iterate at least a certain
// number of times to amortize the cost of looping.
static constexpr int kLoopMinIter = 3;
static constexpr int kAlignedLoopMinIter = 2;

// We restrict the size of the block of data to handle in a loop.
// Generally speaking block size <= 16 perform poorly.
static constexpr int kLoopBlockSize[] = {16, 32, 64};

// We restrict alignment to the following values.
static constexpr int kLoopAlignments[] = {16, 32, 64};

// We make sure that the region bounds are one of the following values.
static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48,
64, 96, 128, 256, 512, 1024, kMaxSize};

// We also allow disabling loops, aligned loops and accelerators.
static constexpr bool kDisableLoop = false;
static constexpr bool kDisableAlignedLoop = false;
static constexpr bool kDisableAccelerator = false;

// For memcpy, we can also explore whether aligning on source or destination has
// an effect.
static constexpr bool kExploreAlignmentArg = true;

// The function we generate code for.
// BCMP is specifically disabled for now.
static constexpr int kFunctionTypes[] = {
(int)FunctionType::MEMCPY,
(int)FunctionType::MEMCMP,
// (int)FunctionType::BCMP,
(int)FunctionType::MEMSET,
(int)FunctionType::BZERO,
};

// The actual implementation of each function can be handled via primitive types
// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
// We want to move toward delegating the code generation entirely to the
// compiler but for now we have to make use of -per microarchitecture- custom
// implementations. Scalar being more portable but also less performant, we
// remove it as well.
static constexpr int kElementClasses[] = {
// (int)ElementTypeClass::SCALAR,
(int)ElementTypeClass::NATIVE,
// (int)ElementTypeClass::BUILTIN
};

RandomFunctionGenerator::RandomFunctionGenerator()
: Solver(Context), Type(Context.int_const("Type")),
ContiguousBegin(Context.int_const("ContiguousBegin")),
ContiguousEnd(Context.int_const("ContiguousEnd")),
OverlapBegin(Context.int_const("OverlapBegin")),
OverlapEnd(Context.int_const("OverlapEnd")),
LoopBegin(Context.int_const("LoopBegin")),
LoopEnd(Context.int_const("LoopEnd")),
LoopBlockSize(Context.int_const("LoopBlockSize")),
AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
AlignedAlignment(Context.int_const("AlignedAlignment")),
AlignedArg(Context.int_const("AlignedArg")),
AcceleratorBegin(Context.int_const("AcceleratorBegin")),
AcceleratorEnd(Context.int_const("AcceleratorEnd")),
ElementClass(Context.int_const("ElementClass")) {
// All possible functions.
Solver.add(inSetConstraint(Type, kFunctionTypes));

// Add constraints for region bounds.
addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
addBoundsAndAnchors(OverlapBegin, OverlapEnd);
addBoundsAndAnchors(LoopBegin, LoopEnd);
addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
// We always consider strategies in this order, and we
// always end with the `Accelerator` strategy, as it's typically more
// efficient for large sizes.
// Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
Solver.add(ContiguousEnd == OverlapBegin);
Solver.add(OverlapEnd == LoopBegin);
Solver.add(LoopEnd == AlignedLoopBegin);
Solver.add(AlignedLoopEnd == AcceleratorBegin);
// Fix endpoints: The minimum size that we want to copy is 0, and we always
// start with the `Contiguous` strategy. The max size is `kMaxSize`.
Solver.add(ContiguousBegin == 0);
Solver.add(AcceleratorEnd == kMaxSize);
// Contiguous
Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
// Overlap
Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
// Overlap only ever makes sense when accessing multiple bytes at a time.
// i.e. Overlap<1> is useless.
Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
// Loop
addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
// Aligned Loop
addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
kAlignedLoopMinIter);
Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
Solver.add(
(ExploreAlignment &&
inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
(!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
// Accelerator
Solver.add(IsMemcpy ||
(AcceleratorBegin ==
AcceleratorEnd)); // Only Memcpy has accelerator for now.
// Element classes
Solver.add(inSetConstraint(ElementClass, kElementClasses));

if (kDisableLoop)
Solver.add(LoopBegin == LoopEnd);
if (kDisableAlignedLoop)
Solver.add(AlignedLoopBegin == AlignedLoopEnd);
if (kDisableAccelerator)
Solver.add(AcceleratorBegin == AcceleratorEnd);
}

// Creates SizeSpan from Begin/End values.
// Returns llvm::None if Begin==End.
static Optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
if (Begin == End)
return None;
SizeSpan SS;
SS.Begin = Begin;
SS.End = End;
return SS;
}

// Generic method to create a `Region` struct with a Span or None if span is
// empty.
template <typename Region>
static Optional<Region> As(size_t Begin, size_t End) {
if (auto Span = AsSizeSpan(Begin, End)) {
Region Output;
Output.Span = *Span;
return Output;
}
return None;
}

// Returns a Loop struct or None if span is empty.
static Optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
if (auto Span = AsSizeSpan(Begin, End)) {
Loop Output;
Output.Span = *Span;
Output.BlockSize = BlockSize;
return Output;
}
return None;
}

// Returns an AlignedLoop struct or None if span is empty.
static Optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
size_t BlockSize, size_t Alignment,
AlignArg AlignTo) {
if (auto Loop = AsLoop(Begin, End, BlockSize)) {
AlignedLoop Output;
Output.Loop = *Loop;
Output.Alignment = Alignment;
Output.AlignTo = AlignTo;
return Output;
}
return None;
}

Optional<FunctionDescriptor> RandomFunctionGenerator::next() {
if (Solver.check() != z3::sat)
return {};

z3::model m = Solver.get_model();

// Helper method to get the current numerical value of a z3::expr.
const auto E = [&m](z3::expr &V) -> int {
return m.eval(V).get_numeral_int();
};

// Fill is the function descriptor to return.
FunctionDescriptor R;
R.Type = FunctionType(E(Type));
R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
E(AlignedLoopBlockSize), E(AlignedAlignment),
AlignArg(E(AlignedArg)));
R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
R.ElementClass = ElementTypeClass(E(ElementClass));

// Express current state as a set of constraints.
z3::expr CurrentLayout =
(Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
(ContiguousEnd == E(ContiguousEnd)) &&
(OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
(LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
(LoopBlockSize == E(LoopBlockSize)) &&
(AlignedLoopBegin == E(AlignedLoopBegin)) &&
(AlignedLoopEnd == E(AlignedLoopEnd)) &&
(AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
(AlignedAlignment == E(AlignedAlignment)) &&
(AlignedArg == E(AlignedArg)) &&
(AcceleratorBegin == E(AcceleratorBegin)) &&
(AcceleratorEnd == E(AcceleratorEnd)) &&
(ElementClass == E(ElementClass));

// Ask solver to never show this configuration ever again.
Solver.add(!CurrentLayout);
return R;
}

// Make sure `Variable` is one of the provided values.
z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
ArrayRef<int> Values) const {
z3::expr_vector Args(Variable.ctx());
for (int Value : Values)
Args.push_back(Variable == Value);
return z3::mk_or(Args);
}

void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
z3::expr &End) {
// Begin and End are picked amongst a set of predefined values.
Solver.add(inSetConstraint(Begin, kAnchors));
Solver.add(inSetConstraint(End, kAnchors));
Solver.add(Begin >= 0);
Solver.add(Begin <= End);
Solver.add(End <= kMaxSize);
}

void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
const z3::expr &LoopEnd,
z3::expr &LoopBlockSize,
int LoopMinIter) {
Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
Solver.add(LoopBegin == LoopEnd ||
(LoopBegin > (LoopMinIter * LoopBlockSize)));
}

} // namespace automemcpy
} // namespace llvm
180 changes: 180 additions & 0 deletions libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
//===-- Analyze benchmark JSON files --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This code analyzes the json file produced by the `automemcpy` binary.
//
// As a remainder, `automemcpy` will benchmark each autogenerated memory
// functions against one of the predefined distributions available in the
// `libc/benchmarks/distributions` folder.
//
// It works as follows:
// - Reads one or more json files.
// - If there are several runs for the same function and distribution, picks the
// median throughput (aka `BytesPerSecond`).
// - Aggregates the throughput per distributions and scores them from worst (0)
// to best (1).
// - Each distribution categorizes each function into one of the following
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
// BAD.
// - A process similar to the Majority Judgment voting system is used to `elect`
// the best function. The histogram of grades is returned so we can
// distinguish between functions with the same final grade. In the following
// example both functions grade EXCELLENT but we may prefer the second one.
//
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
// |------------|-----------|-----------|------|----------| ...
// | Function_1 | 7 | 1 | 2 | | ...
// | Function_2 | 6 | 4 | | | ...

#include "automemcpy/ResultAnalyzer.h"
#include "llvm/ADT/StringRef.h"
#include <numeric>
#include <unordered_map>

namespace llvm {

namespace automemcpy {

StringRef Grade::getString(const GradeEnum &GE) {
switch (GE) {
case EXCELLENT:
return "EXCELLENT";
case VERY_GOOD:
return "VERY_GOOD";
case GOOD:
return "GOOD";
case PASSABLE:
return "PASSABLE";
case INADEQUATE:
return "INADEQUATE";
case MEDIOCRE:
return "MEDIOCRE";
case BAD:
return "BAD";
case ARRAY_SIZE:
report_fatal_error("logic error");
}
}

Grade::GradeEnum Grade::judge(double Score) {
if (Score >= 6. / 7)
return EXCELLENT;
if (Score >= 5. / 7)
return VERY_GOOD;
if (Score >= 4. / 7)
return GOOD;
if (Score >= 3. / 7)
return PASSABLE;
if (Score >= 2. / 7)
return INADEQUATE;
if (Score >= 1. / 7)
return MEDIOCRE;
return BAD;
}

std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
BucketedSamples;
for (const auto &S : Samples)
BucketedSamples[S.Id].push_back(S.BytesPerSecond);
std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
Throughputs;
for (auto &Pair : BucketedSamples) {
const auto &Id = Pair.first;
auto &Values = Pair.second;
const size_t HalfSize = Values.size() / 2;
std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
const double MedianValue = Values[HalfSize];
Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
}
std::vector<FunctionData> Output;
for (auto &Pair : Throughputs) {
FunctionData Data;
Data.Id = Pair.first;
for (const auto &Pair : Pair.second)
Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
Pair.getValue();
Output.push_back(std::move(Data));
}
return Output;
}

void fillScores(MutableArrayRef<FunctionData> Functions) {
// A key to bucket throughput per function type and distribution.
struct Key {
FunctionType Type;
StringRef Distribution;

COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
};

// Tracks minimum and maximum values.
struct MinMax {
double Min = std::numeric_limits<double>::max();
double Max = std::numeric_limits<double>::min();
void update(double Value) {
if (Value < Min)
Min = Value;
if (Value > Max)
Max = Value;
}
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
};

std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
for (const auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
}

for (auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);
}
}
}

void castVotes(MutableArrayRef<FunctionData> Functions) {
for (FunctionData &Function : Functions)
for (const auto &Pair : Function.PerDistributionData) {
const StringRef Distribution = Pair.getKey();
const double Score = Pair.getValue().Score;
const auto G = Grade::judge(Score);
++(Function.GradeHisto[G]);
Function.PerDistributionData[Distribution].Grade = G;
}

for (FunctionData &Function : Functions) {
const auto &GradeHisto = Function.GradeHisto;
const size_t Votes =
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
const size_t MedianVote = Votes / 2;
size_t CountedVotes = 0;
Grade::GradeEnum MedianGrade = Grade::BAD;
for (size_t I = 0; I < GradeHisto.size(); ++I) {
CountedVotes += GradeHisto[I];
if (CountedVotes > MedianVote) {
MedianGrade = Grade::GradeEnum(I);
break;
}
}
Function.FinalGrade = MedianGrade;
}
}

} // namespace automemcpy
} // namespace llvm
158 changes: 158 additions & 0 deletions libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//===-- Application to analyze benchmark JSON files -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "automemcpy/ResultAnalyzer.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"

namespace llvm {

// User can specify one or more json filenames to process on the command line.
static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
cl::desc("<input json files>"));

namespace automemcpy {

// This is defined in the autogenerated 'Implementations.cpp' file.
extern ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors();

// Iterates over all functions and fills a map of function name to function
// descriptor pointers.
static StringMap<const FunctionDescriptor *> createFunctionDescriptorMap() {
StringMap<const FunctionDescriptor *> Descriptors;
for (const NamedFunctionDescriptor &FD : getFunctionDescriptors())
Descriptors.insert_or_assign(FD.Name, &FD.Desc);
return Descriptors;
}

// Retrieves the function descriptor for a particular function name.
static const FunctionDescriptor &getFunctionDescriptor(StringRef FunctionName) {
static StringMap<const FunctionDescriptor *> Descriptors =
createFunctionDescriptorMap();
const auto *FD = Descriptors.lookup(FunctionName);
if (!FD)
report_fatal_error(
Twine("No FunctionDescriptor for ").concat(FunctionName));
return *FD;
}

// Functions and distributions names are stored quite a few times so it's more
// efficient to internalize these strings and refer to them through 'StringRef'.
static StringRef getInternalizedString(StringRef VolatileStr) {
static llvm::StringSet<> StringCache;
return StringCache.insert(VolatileStr).first->getKey();
}

// Helper function for the LLVM JSON API.
bool fromJSON(const json::Value &V, Sample &Out, json::Path P) {
std::string Label;
json::ObjectMapper O(V, P);
if (O && O.map("bytes_per_second", Out.BytesPerSecond) &&
O.map("label", Label)) {
const auto LabelPair = StringRef(Label).split(',');
Out.Id.Function.Name = getInternalizedString(LabelPair.first);
Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type;
Out.Id.Distribution.Name = getInternalizedString(LabelPair.second);
return true;
}
return false;
}

// An object to represent the content of the JSON file.
// This is easier to parse/serialize JSON when the structures of the json file
// maps the structure of the object.
struct JsonFile {
std::vector<Sample> Samples;
};

// Helper function for the LLVM JSON API.
bool fromJSON(const json::Value &V, JsonFile &JF, json::Path P) {
json::ObjectMapper O(V, P);
return O && O.map("benchmarks", JF.Samples);
}

// Global object to ease error reporting, it consumes errors and crash the
// application with a meaningful message.
static ExitOnError ExitOnErr;

// Main JSON parsing method. Reads the content of the file pointed to by
// 'Filename' and returns a JsonFile object.
JsonFile parseJsonResultFile(StringRef Filename) {
auto Buf = ExitOnErr(errorOrToExpected(
MemoryBuffer::getFile(Filename, /*bool IsText=*/true,
/*RequiresNullTerminator=*/false)));
auto JsonValue = ExitOnErr(json::parse(Buf->getBuffer()));
json::Path::Root Root;
JsonFile JF;
if (!fromJSON(JsonValue, JF, Root))
ExitOnErr(Root.getError());
return JF;
}

// Serializes the 'GradeHisto' to the provided 'Stream'.
static void Serialize(raw_ostream &Stream, const GradeHistogram &GH) {
static constexpr std::array<StringRef, 9> kCharacters = {
" ", "", "", "", "", "", "", "", ""};

const size_t Max = *std::max_element(GH.begin(), GH.end());
for (size_t I = 0; I < GH.size(); ++I) {
size_t Index = (float(GH[I]) / Max) * (kCharacters.size() - 1);
Stream << kCharacters.at(Index);
}
}

int Main(int argc, char **argv) {
ExitOnErr.setBanner("Automemcpy Json Results Analyzer stopped with error: ");
cl::ParseCommandLineOptions(argc, argv, "Automemcpy Json Results Analyzer\n");

// Reads all samples stored in the input JSON files.
std::vector<Sample> Samples;
for (const auto &Filename : InputFilenames) {
auto Result = parseJsonResultFile(Filename);
llvm::append_range(Samples, Result.Samples);
}

// Extracts median of throughputs.
std::vector<FunctionData> Functions = getThroughputs(Samples);
fillScores(Functions);
castVotes(Functions);

// TODO: Implement tie breaking algorithm.
std::sort(Functions.begin(), Functions.end(),
[](const FunctionData &A, const FunctionData &B) {
return A.FinalGrade < B.FinalGrade;
});

// Present data by function type.
std::stable_sort(Functions.begin(), Functions.end(),
[](const FunctionData &A, const FunctionData &B) {
return A.Id.Type < B.Id.Type;
});

// Print result.
for (const FunctionData &Function : Functions) {
outs() << formatv("{0,-10}", Grade::getString(Function.FinalGrade));
outs() << " |";
Serialize(outs(), Function.GradeHisto);
outs() << "| ";
outs().resetColor();
outs() << formatv("{0,+25}", Function.Id.Name);
outs() << "\n";
}

return EXIT_SUCCESS;
}

} // namespace automemcpy
} // namespace llvm

int main(int argc, char **argv) { return llvm::automemcpy::Main(argc, argv); }
9 changes: 9 additions & 0 deletions libc/benchmarks/automemcpy/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
add_libc_benchmark_unittest(libc-automemcpy-codegen-test
SRCS CodeGenTest.cpp
DEPENDS automemcpy_codegen
)

add_libc_benchmark_unittest(libc-automemcpy-result-analyzer-test
SRCS ResultAnalyzerTest.cpp
DEPENDS automemcpy_result_analyzer_lib
)
219 changes: 219 additions & 0 deletions libc/benchmarks/automemcpy/unittests/CodeGenTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
//===-- Automemcpy CodeGen Test -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "automemcpy/CodeGen.h"
#include "automemcpy/RandomFunctionGenerator.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

using testing::AllOf;
using testing::AnyOf;
using testing::ElementsAre;
using testing::Ge;
using testing::Gt;
using testing::Le;
using testing::Lt;

namespace llvm {
namespace automemcpy {
namespace {

TEST(Automemcpy, Codegen) {
static constexpr FunctionDescriptor kDescriptors[] = {
{FunctionType::MEMCPY, llvm::None, llvm::None, llvm::None, llvm::None,
Accelerator{{0, kMaxSize}}, ElementTypeClass::NATIVE},
{FunctionType::MEMCPY, Contiguous{{0, 4}}, Overlap{{4, 256}},
Loop{{256, kMaxSize}, 64}, llvm::None, llvm::None,
ElementTypeClass::NATIVE},
{FunctionType::MEMCMP, Contiguous{{0, 2}}, Overlap{{2, 64}}, llvm::None,
AlignedLoop{Loop{{64, kMaxSize}, 16}, 16, AlignArg::_1}, llvm::None,
ElementTypeClass::NATIVE},
{FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, llvm::None,
AlignedLoop{Loop{{256, kMaxSize}, 32}, 16, AlignArg::_1}, llvm::None,
ElementTypeClass::NATIVE},
{FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, llvm::None,
AlignedLoop{Loop{{256, kMaxSize}, 32}, 32, AlignArg::_1}, llvm::None,
ElementTypeClass::NATIVE},
{FunctionType::BZERO, Contiguous{{0, 4}}, Overlap{{4, 128}}, llvm::None,
AlignedLoop{Loop{{128, kMaxSize}, 32}, 32, AlignArg::_1}, llvm::None,
ElementTypeClass::NATIVE},
};

std::string Output;
raw_string_ostream OutputStream(Output);
Serialize(OutputStream, kDescriptors);

EXPECT_STREQ(OutputStream.str().c_str(),
R"(// This file is auto-generated by libc/benchmarks/automemcpy.
// Functions : 6
#include "LibcFunctionPrototypes.h"
#include "automemcpy/FunctionDescriptor.h"
#include "src/string/memory_utils/elements.h"
using llvm::libc_benchmarks::BzeroConfiguration;
using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;
using llvm::libc_benchmarks::MemcpyConfiguration;
using llvm::libc_benchmarks::MemsetConfiguration;
namespace __llvm_libc {
static void memcpy_0xE00E29EE73994E2B(char *__restrict dst, const char *__restrict src, size_t size) {
using namespace __llvm_libc::x86;
return Copy<Accelerator>(dst, src, size);
}
static void memcpy_0x7381B60C7BE75EF9(char *__restrict dst, const char *__restrict src, size_t size) {
using namespace __llvm_libc::x86;
if(size == 0) return;
if(size == 1) return Copy<_1>(dst, src);
if(size == 2) return Copy<_2>(dst, src);
if(size == 3) return Copy<_3>(dst, src);
if(size < 8) return Copy<HeadTail<_4>>(dst, src, size);
if(size < 16) return Copy<HeadTail<_8>>(dst, src, size);
if(size < 32) return Copy<HeadTail<_16>>(dst, src, size);
if(size < 64) return Copy<HeadTail<_32>>(dst, src, size);
if(size < 128) return Copy<HeadTail<_64>>(dst, src, size);
if(size < 256) return Copy<HeadTail<_128>>(dst, src, size);
return Copy<Loop<_64>>(dst, src, size);
}
static int memcmp_0x348D7BA6DB0EE033(const char * lhs, const char * rhs, size_t size) {
using namespace __llvm_libc::x86;
if(size == 0) return 0;
if(size == 1) return ThreeWayCompare<_1>(lhs, rhs);
if(size < 4) return ThreeWayCompare<HeadTail<_2>>(lhs, rhs, size);
if(size < 8) return ThreeWayCompare<HeadTail<_4>>(lhs, rhs, size);
if(size < 16) return ThreeWayCompare<HeadTail<_8>>(lhs, rhs, size);
if(size < 32) return ThreeWayCompare<HeadTail<_16>>(lhs, rhs, size);
if(size < 64) return ThreeWayCompare<HeadTail<_32>>(lhs, rhs, size);
return ThreeWayCompare<Align<_16,Arg::Lhs>::Then<Loop<_16>>>(lhs, rhs, size);
}
static void memset_0x71E761699B999863(char * dst, int value, size_t size) {
using namespace __llvm_libc::x86;
if(size == 0) return;
if(size == 1) return SplatSet<_1>(dst, value);
if(size < 4) return SplatSet<HeadTail<_2>>(dst, value, size);
if(size < 8) return SplatSet<HeadTail<_4>>(dst, value, size);
if(size < 16) return SplatSet<HeadTail<_8>>(dst, value, size);
if(size < 32) return SplatSet<HeadTail<_16>>(dst, value, size);
if(size < 64) return SplatSet<HeadTail<_32>>(dst, value, size);
if(size < 128) return SplatSet<HeadTail<_64>>(dst, value, size);
if(size < 256) return SplatSet<HeadTail<_128>>(dst, value, size);
return SplatSet<Align<_16,Arg::Dst>::Then<Loop<_32>>>(dst, value, size);
}
static void memset_0x3DF0F44E2ED6A50F(char * dst, int value, size_t size) {
using namespace __llvm_libc::x86;
if(size == 0) return;
if(size == 1) return SplatSet<_1>(dst, value);
if(size < 4) return SplatSet<HeadTail<_2>>(dst, value, size);
if(size < 8) return SplatSet<HeadTail<_4>>(dst, value, size);
if(size < 16) return SplatSet<HeadTail<_8>>(dst, value, size);
if(size < 32) return SplatSet<HeadTail<_16>>(dst, value, size);
if(size < 64) return SplatSet<HeadTail<_32>>(dst, value, size);
if(size < 128) return SplatSet<HeadTail<_64>>(dst, value, size);
if(size < 256) return SplatSet<HeadTail<_128>>(dst, value, size);
return SplatSet<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, value, size);
}
static void bzero_0x475977492C218AD4(char * dst, size_t size) {
using namespace __llvm_libc::x86;
if(size == 0) return;
if(size == 1) return SplatSet<_1>(dst, 0);
if(size == 2) return SplatSet<_2>(dst, 0);
if(size == 3) return SplatSet<_3>(dst, 0);
if(size < 8) return SplatSet<HeadTail<_4>>(dst, 0, size);
if(size < 16) return SplatSet<HeadTail<_8>>(dst, 0, size);
if(size < 32) return SplatSet<HeadTail<_16>>(dst, 0, size);
if(size < 64) return SplatSet<HeadTail<_32>>(dst, 0, size);
if(size < 128) return SplatSet<HeadTail<_64>>(dst, 0, size);
return SplatSet<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, 0, size);
}
} // namespace __llvm_libc
namespace llvm {
namespace automemcpy {
ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors() {
static constexpr NamedFunctionDescriptor kDescriptors[] = {
{"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,llvm::None,llvm::None,llvm::None,llvm::None,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}},
{"memcpy_0x7381B60C7BE75EF9",{FunctionType::MEMCPY,Contiguous{{0,4}},Overlap{{4,256}},Loop{{256,kMaxSize},64},llvm::None,llvm::None,ElementTypeClass::NATIVE}},
{"memcmp_0x348D7BA6DB0EE033",{FunctionType::MEMCMP,Contiguous{{0,2}},Overlap{{2,64}},llvm::None,AlignedLoop{Loop{{64,kMaxSize},16},16,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}},
{"memset_0x71E761699B999863",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},llvm::None,AlignedLoop{Loop{{256,kMaxSize},32},16,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}},
{"memset_0x3DF0F44E2ED6A50F",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},llvm::None,AlignedLoop{Loop{{256,kMaxSize},32},32,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}},
{"bzero_0x475977492C218AD4",{FunctionType::BZERO,Contiguous{{0,4}},Overlap{{4,128}},llvm::None,AlignedLoop{Loop{{128,kMaxSize},32},32,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}},
};
return makeArrayRef(kDescriptors);
}
} // namespace automemcpy
} // namespace llvm
using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t);
template <MemcpyStub Foo>
void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) {
Foo(reinterpret_cast<char *__restrict>(dst),
reinterpret_cast<const char *__restrict>(src), size);
return dst;
}
llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations() {
using namespace __llvm_libc;
static constexpr MemcpyConfiguration kConfigurations[] = {
{Wrap<memcpy_0xE00E29EE73994E2B>, "memcpy_0xE00E29EE73994E2B"},
{Wrap<memcpy_0x7381B60C7BE75EF9>, "memcpy_0x7381B60C7BE75EF9"},
};
return llvm::makeArrayRef(kConfigurations);
}
using MemcmpStub = int (*)(const char *, const char *, size_t);
template <MemcmpStub Foo>
int Wrap(const void *lhs, const void *rhs, size_t size) {
return Foo(reinterpret_cast<const char *>(lhs),
reinterpret_cast<const char *>(rhs), size);
}
llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations() {
using namespace __llvm_libc;
static constexpr MemcmpOrBcmpConfiguration kConfigurations[] = {
{Wrap<memcmp_0x348D7BA6DB0EE033>, "memcmp_0x348D7BA6DB0EE033"},
};
return llvm::makeArrayRef(kConfigurations);
}
llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations() {
return {};
}
using MemsetStub = void (*)(char *, int, size_t);
template <MemsetStub Foo> void *Wrap(void *dst, int value, size_t size) {
Foo(reinterpret_cast<char *>(dst), value, size);
return dst;
}
llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations() {
using namespace __llvm_libc;
static constexpr MemsetConfiguration kConfigurations[] = {
{Wrap<memset_0x71E761699B999863>, "memset_0x71E761699B999863"},
{Wrap<memset_0x3DF0F44E2ED6A50F>, "memset_0x3DF0F44E2ED6A50F"},
};
return llvm::makeArrayRef(kConfigurations);
}
using BzeroStub = void (*)(char *, size_t);
template <BzeroStub Foo> void Wrap(void *dst, size_t size) {
Foo(reinterpret_cast<char *>(dst), size);
}
llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations() {
using namespace __llvm_libc;
static constexpr BzeroConfiguration kConfigurations[] = {
{Wrap<bzero_0x475977492C218AD4>, "bzero_0x475977492C218AD4"},
};
return llvm::makeArrayRef(kConfigurations);
}
// Functions : 6
)");
}
} // namespace
} // namespace automemcpy
} // namespace llvm
170 changes: 170 additions & 0 deletions libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
//===-- Automemcpy Json Results Analyzer Test ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "automemcpy/ResultAnalyzer.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

using testing::ElementsAre;
using testing::Pair;
using testing::SizeIs;

namespace llvm {
namespace automemcpy {
namespace {

TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
static constexpr DistributionId DistA = {{"A"}};
static constexpr SampleId Id = {Foo1, DistA};
static constexpr Sample kSamples[] = {
Sample{Id, 4},
};

const std::vector<FunctionData> Data = getThroughputs(kSamples);
EXPECT_THAT(Data, SizeIs(1));
EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// A single value is provided.
EXPECT_THAT(
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4);
}

TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
static constexpr DistributionId DistA = {{"A"}};
static constexpr SampleId Id = {Foo1, DistA};
static constexpr Sample kSamples[] = {Sample{Id, 4}, Sample{Id, 5},
Sample{Id, 5}};

const std::vector<FunctionData> Data = getThroughputs(kSamples);
EXPECT_THAT(Data, SizeIs(1));
EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// When multiple values are provided we pick the median one (here median of 4,
// 5, 5).
EXPECT_THAT(
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5);
}

TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
static constexpr DistributionId DistA = {{"A"}};
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
static constexpr DistributionId DistB = {{"B"}};
static constexpr Sample kSamples[] = {
Sample{{Foo1, DistA}, 1}, Sample{{Foo1, DistB}, 2},
Sample{{Foo2, DistA}, 3}, Sample{{Foo2, DistB}, 4}};
// Data is aggregated per function.
const std::vector<FunctionData> Data = getThroughputs(kSamples);
EXPECT_THAT(Data, SizeIs(2)); // 2 functions Foo1 and Foo2.
// Each function has data for both distributions DistA and DistB.
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(2));
EXPECT_THAT(Data[1].PerDistributionData, SizeIs(2));
}

TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY};
static constexpr DistributionId Dist = {{"A"}};
static constexpr Sample kSamples[] = {Sample{{Foo1, Dist}, 1},
Sample{{Foo2, Dist}, 2},
Sample{{Foo3, Dist}, 3}};

// Data is aggregated per function.
std::vector<FunctionData> Data = getThroughputs(kSamples);

// Sort Data by function name so we can test them.
std::sort(
Data.begin(), Data.end(),
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });

EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1);
EXPECT_THAT(Data[1].Id, Foo2);
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2);
EXPECT_THAT(Data[2].Id, Foo3);
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3);

// Normalizes throughput per distribution.
fillScores(Data);
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Score, 0);
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Score, 0.5);
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Score, 1);
}

TEST(AutomemcpyJsonResultsAnalyzer, castVotes) {
static constexpr double kAbsErr = 0.01;

static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY};
static constexpr DistributionId DistA = {{"A"}};
static constexpr DistributionId DistB = {{"B"}};
static constexpr Sample kSamples[] = {
Sample{{Foo1, DistA}, 0}, Sample{{Foo1, DistB}, 30},
Sample{{Foo2, DistA}, 1}, Sample{{Foo2, DistB}, 100},
Sample{{Foo3, DistA}, 7}, Sample{{Foo3, DistB}, 100},
};

// DistA Thoughput ranges from 0 to 7.
// DistB Thoughput ranges from 30 to 100.

// Data is aggregated per function.
std::vector<FunctionData> Data = getThroughputs(kSamples);

// Sort Data by function name so we can test them.
std::sort(
Data.begin(), Data.end(),
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });

// Normalizes throughput per distribution.
fillScores(Data);

// Cast votes
castVotes(Data);

EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[1].Id, Foo2);
EXPECT_THAT(Data[2].Id, Foo3);

// Distribution A
// Throughput is 0, 1 and 7, so normalized scores are 0, 1/7 and 1.
EXPECT_NEAR(Data[0].PerDistributionData.lookup("A").Score, 0, kAbsErr);
EXPECT_NEAR(Data[1].PerDistributionData.lookup("A").Score, 1. / 7, kAbsErr);
EXPECT_NEAR(Data[2].PerDistributionData.lookup("A").Score, 1, kAbsErr);
// which are turned into grades BAD, MEDIOCRE and EXCELLENT.
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Grade, Grade::BAD);
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Grade, Grade::MEDIOCRE);
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Grade, Grade::EXCELLENT);

// Distribution B
// Throughput is 30, 100 and 100, so normalized scores are 0, 1 and 1.
EXPECT_NEAR(Data[0].PerDistributionData.lookup("B").Score, 0, kAbsErr);
EXPECT_NEAR(Data[1].PerDistributionData.lookup("B").Score, 1, kAbsErr);
EXPECT_NEAR(Data[2].PerDistributionData.lookup("B").Score, 1, kAbsErr);
// which are turned into grades BAD, EXCELLENT and EXCELLENT.
EXPECT_THAT(Data[0].PerDistributionData.lookup("B").Grade, Grade::BAD);
EXPECT_THAT(Data[1].PerDistributionData.lookup("B").Grade, Grade::EXCELLENT);
EXPECT_THAT(Data[2].PerDistributionData.lookup("B").Grade, Grade::EXCELLENT);

// Now looking from the functions point of view.
// Note the array is indexed by GradeEnum values (EXCELLENT=0 / BAD = 6)
EXPECT_THAT(Data[0].GradeHisto, ElementsAre(0, 0, 0, 0, 0, 0, 2));
EXPECT_THAT(Data[1].GradeHisto, ElementsAre(1, 0, 0, 0, 0, 1, 0));
EXPECT_THAT(Data[2].GradeHisto, ElementsAre(2, 0, 0, 0, 0, 0, 0));

EXPECT_THAT(Data[0].FinalGrade, Grade::BAD);
EXPECT_THAT(Data[1].FinalGrade, Grade::MEDIOCRE);
EXPECT_THAT(Data[2].FinalGrade, Grade::EXCELLENT);
}

} // namespace
} // namespace automemcpy
} // namespace llvm
37 changes: 37 additions & 0 deletions libc/src/string/memory_utils/elements.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,43 @@ template <> struct Chained<> {
static void SplatSet(char *dst, const unsigned char value) {}
};

// Overlap ElementA and ElementB so they span Size bytes.
template <size_t Size, typename ElementA, typename ElementB = ElementA>
struct Overlap {
static constexpr size_t kSize = Size;
static_assert(ElementB::kSize <= ElementA::kSize, "ElementB too big");
static_assert(ElementA::kSize <= Size, "ElementA too big");
static_assert((ElementA::kSize + ElementB::kSize) >= Size,
"Elements too small to overlap");
static constexpr size_t kOffset = kSize - ElementB::kSize;

static void Copy(char *__restrict dst, const char *__restrict src) {
ElementA::Copy(dst, src);
ElementB::Copy(dst + kOffset, src + kOffset);
}

static bool Equals(const char *lhs, const char *rhs) {
if (!ElementA::Equals(lhs, rhs))
return false;
if (!ElementB::Equals(lhs + kOffset, rhs + kOffset))
return false;
return true;
}

static int ThreeWayCompare(const char *lhs, const char *rhs) {
if (!ElementA::Equals(lhs, rhs))
return ElementA::ThreeWayCompare(lhs, rhs);
if (!ElementB::Equals(lhs + kOffset, rhs + kOffset))
return ElementB::ThreeWayCompare(lhs + kOffset, rhs + kOffset);
return 0;
}

static void SplatSet(char *dst, const unsigned char value) {
ElementA::SplatSet(dst, value);
ElementB::SplatSet(dst + kOffset, value);
}
};

// Runtime-size Higher-Order Operations
// ------------------------------------
// - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
Expand Down