| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| //===-- Analyze benchmark JSON files ----------------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H | ||
| #define LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H | ||
|
|
||
| #include "automemcpy/FunctionDescriptor.h" | ||
| #include "llvm/ADT/ArrayRef.h" | ||
| #include "llvm/ADT/StringMap.h" | ||
| #include <array> | ||
| #include <vector> | ||
|
|
||
| namespace llvm { | ||
| namespace automemcpy { | ||
|
|
||
| // A Grade as in the Majority Judgment voting system. | ||
| struct Grade { | ||
| enum GradeEnum { | ||
| EXCELLENT, | ||
| VERY_GOOD, | ||
| GOOD, | ||
| PASSABLE, | ||
| INADEQUATE, | ||
| MEDIOCRE, | ||
| BAD, | ||
| ARRAY_SIZE, | ||
| }; | ||
|
|
||
| // Returns a human readable string of the enum. | ||
| static StringRef getString(const GradeEnum &GE); | ||
|
|
||
| // Turns 'Score' into a GradeEnum. | ||
| static GradeEnum judge(double Score); | ||
| }; | ||
|
|
||
| // A 'GradeEnum' indexed array with counts for each grade. | ||
| using GradeHistogram = std::array<size_t, Grade::ARRAY_SIZE>; | ||
|
|
||
| // Identifies a Function by its name and type. Used as a key in a map. | ||
| struct FunctionId { | ||
| StringRef Name; | ||
| FunctionType Type; | ||
| COMPARABLE_AND_HASHABLE(FunctionId, Type, Name) | ||
| }; | ||
|
|
||
| struct PerDistributionData { | ||
| double MedianBytesPerSecond; // Median of samples for this distribution. | ||
| double Score; // Normalized score for this distribution. | ||
| Grade::GradeEnum Grade; // Grade for this distribution. | ||
| }; | ||
|
|
||
| struct FunctionData { | ||
| FunctionId Id; | ||
| StringMap<PerDistributionData> PerDistributionData; | ||
| GradeHistogram GradeHisto = {}; // GradeEnum indexed array | ||
| Grade::GradeEnum FinalGrade = Grade::BAD; // Overall grade for this function | ||
| }; | ||
|
|
||
| // Identifies a Distribution by its name. Used as a key in a map. | ||
| struct DistributionId { | ||
| StringRef Name; | ||
| COMPARABLE_AND_HASHABLE(DistributionId, Name) | ||
| }; | ||
|
|
||
| // Identifies a Sample by its distribution and function. Used as a key in a map. | ||
| struct SampleId { | ||
| FunctionId Function; | ||
| DistributionId Distribution; | ||
| COMPARABLE_AND_HASHABLE(SampleId, Function.Type, Function.Name, | ||
| Distribution.Name) | ||
| }; | ||
|
|
||
| // A SampleId with an associated measured throughput. | ||
| struct Sample { | ||
| SampleId Id; | ||
| double BytesPerSecond = 0; | ||
| }; | ||
|
|
||
| // This function collects Samples that belong to the same distribution and | ||
| // function and retains the median one. It then stores each of them into a | ||
| // 'FunctionData' and returns them as a vector. | ||
| std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples); | ||
|
|
||
| // Normalize the function's throughput per distribution. | ||
| void fillScores(MutableArrayRef<FunctionData> Functions); | ||
|
|
||
| // Convert scores into Grades, stores an histogram of Grade for each functions | ||
| // and cast a median grade for the function. | ||
| void castVotes(MutableArrayRef<FunctionData> Functions); | ||
|
|
||
| } // namespace automemcpy | ||
| } // namespace llvm | ||
|
|
||
| #endif // LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| add_library(automemcpy_codegen CodeGen.cpp) | ||
| target_link_libraries(automemcpy_codegen PUBLIC LLVMSupport) | ||
| target_compile_options(automemcpy_codegen PUBLIC -fno-rtti) | ||
| target_include_directories(automemcpy_codegen PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) | ||
|
|
||
| add_executable(automemcpy_codegen_main CodeGenMain.cpp RandomFunctionGenerator.cpp) | ||
| target_link_libraries(automemcpy_codegen_main PUBLIC automemcpy_codegen ${Z3_LIBRARIES}) | ||
| target_compile_options(automemcpy_codegen_main PUBLIC -fno-rtti) | ||
|
|
||
| set(Implementations "${CMAKE_CURRENT_BINARY_DIR}/Implementations.cpp") | ||
| add_custom_command( | ||
| OUTPUT ${Implementations} | ||
| COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/automemcpy_codegen_main" > "${Implementations}" | ||
| COMMAND echo "automemcpy implementations generated in ${Implementations}" | ||
| WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" | ||
| DEPENDS automemcpy_codegen_main | ||
| ) | ||
|
|
||
| add_library(automemcpy_implementations "${Implementations}") | ||
| target_link_libraries(automemcpy_implementations PUBLIC LLVMSupport libc-memory-benchmark) | ||
| target_include_directories(automemcpy_implementations PRIVATE ${LIBC_SOURCE_DIR} ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) | ||
| target_compile_options(automemcpy_implementations PUBLIC -fno-rtti PRIVATE ${LIBC_COMPILE_OPTIONS_NATIVE} "SHELL:-mllvm -combiner-global-alias-analysis" -fno-builtin) | ||
|
|
||
| add_executable(automemcpy EXCLUDE_FROM_ALL ${LIBC_SOURCE_DIR}/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp) | ||
| target_link_libraries(automemcpy PRIVATE libc-memory-benchmark benchmark_main automemcpy_implementations) | ||
|
|
||
| add_library(automemcpy_result_analyzer_lib EXCLUDE_FROM_ALL ResultAnalyzer.cpp) | ||
| target_link_libraries(automemcpy_result_analyzer_lib PUBLIC LLVMSupport) | ||
| target_include_directories(automemcpy_result_analyzer_lib PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) | ||
|
|
||
| add_executable(automemcpy_result_analyzer EXCLUDE_FROM_ALL ResultAnalyzerMain.cpp) | ||
| target_link_libraries(automemcpy_result_analyzer PRIVATE automemcpy_result_analyzer_lib automemcpy_implementations) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| #include "automemcpy/CodeGen.h" | ||
| #include "automemcpy/RandomFunctionGenerator.h" | ||
| #include <unordered_set> | ||
|
|
||
| namespace llvm { | ||
| namespace automemcpy { | ||
|
|
||
| std::vector<FunctionDescriptor> generateFunctionDescriptors() { | ||
| std::unordered_set<FunctionDescriptor, FunctionDescriptor::Hasher> Seen; | ||
| std::vector<FunctionDescriptor> FunctionDescriptors; | ||
| RandomFunctionGenerator P; | ||
| while (Optional<FunctionDescriptor> MaybeFD = P.next()) { | ||
| FunctionDescriptor FD = *MaybeFD; | ||
| if (Seen.count(FD)) // FIXME: Z3 sometimes returns twice the same object. | ||
| continue; | ||
| Seen.insert(FD); | ||
| FunctionDescriptors.push_back(std::move(FD)); | ||
| } | ||
| return FunctionDescriptors; | ||
| } | ||
|
|
||
| } // namespace automemcpy | ||
| } // namespace llvm | ||
|
|
||
| int main(int, char **) { | ||
| llvm::automemcpy::Serialize(llvm::outs(), | ||
| llvm::automemcpy::generateFunctionDescriptors()); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,279 @@ | ||
| //===-- Generate random but valid function descriptors -------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "automemcpy/RandomFunctionGenerator.h" | ||
|
|
||
| #include <llvm/ADT/None.h> | ||
| #include <llvm/ADT/StringRef.h> | ||
| #include <llvm/Support/raw_ostream.h> | ||
|
|
||
| #include <set> | ||
|
|
||
| namespace llvm { | ||
| namespace automemcpy { | ||
|
|
||
| // Exploration parameters | ||
| // ---------------------- | ||
| // Here we define a set of values that will contraint the exploration and | ||
| // limit combinatorial explosion. | ||
|
|
||
| // We limit the number of cases for individual sizes to sizes up to 4. | ||
| // More individual sizes don't bring much over the overlapping strategy. | ||
| static constexpr int kMaxIndividualSize = 4; | ||
|
|
||
| // We limit Overlapping Strategy to sizes up to 256. | ||
| // An overlap of 256B means accessing 128B at once which is usually not | ||
| // feasible by current CPUs. We rely on the compiler to generate multiple | ||
| // loads/stores if needed but higher sizes are unlikely to benefit from hardware | ||
| // acceleration. | ||
| static constexpr int kMaxOverlapSize = 256; | ||
|
|
||
| // For the loop strategies, we make sure that they iterate at least a certain | ||
| // number of times to amortize the cost of looping. | ||
| static constexpr int kLoopMinIter = 3; | ||
| static constexpr int kAlignedLoopMinIter = 2; | ||
|
|
||
| // We restrict the size of the block of data to handle in a loop. | ||
| // Generally speaking block size <= 16 perform poorly. | ||
| static constexpr int kLoopBlockSize[] = {16, 32, 64}; | ||
|
|
||
| // We restrict alignment to the following values. | ||
| static constexpr int kLoopAlignments[] = {16, 32, 64}; | ||
|
|
||
| // We make sure that the region bounds are one of the following values. | ||
| static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, | ||
| 64, 96, 128, 256, 512, 1024, kMaxSize}; | ||
|
|
||
| // We also allow disabling loops, aligned loops and accelerators. | ||
| static constexpr bool kDisableLoop = false; | ||
| static constexpr bool kDisableAlignedLoop = false; | ||
| static constexpr bool kDisableAccelerator = false; | ||
|
|
||
| // For memcpy, we can also explore whether aligning on source or destination has | ||
| // an effect. | ||
| static constexpr bool kExploreAlignmentArg = true; | ||
|
|
||
| // The function we generate code for. | ||
| // BCMP is specifically disabled for now. | ||
| static constexpr int kFunctionTypes[] = { | ||
| (int)FunctionType::MEMCPY, | ||
| (int)FunctionType::MEMCMP, | ||
| // (int)FunctionType::BCMP, | ||
| (int)FunctionType::MEMSET, | ||
| (int)FunctionType::BZERO, | ||
| }; | ||
|
|
||
| // The actual implementation of each function can be handled via primitive types | ||
| // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). | ||
| // We want to move toward delegating the code generation entirely to the | ||
| // compiler but for now we have to make use of -per microarchitecture- custom | ||
| // implementations. Scalar being more portable but also less performant, we | ||
| // remove it as well. | ||
| static constexpr int kElementClasses[] = { | ||
| // (int)ElementTypeClass::SCALAR, | ||
| (int)ElementTypeClass::NATIVE, | ||
| // (int)ElementTypeClass::BUILTIN | ||
| }; | ||
|
|
||
| RandomFunctionGenerator::RandomFunctionGenerator() | ||
| : Solver(Context), Type(Context.int_const("Type")), | ||
| ContiguousBegin(Context.int_const("ContiguousBegin")), | ||
| ContiguousEnd(Context.int_const("ContiguousEnd")), | ||
| OverlapBegin(Context.int_const("OverlapBegin")), | ||
| OverlapEnd(Context.int_const("OverlapEnd")), | ||
| LoopBegin(Context.int_const("LoopBegin")), | ||
| LoopEnd(Context.int_const("LoopEnd")), | ||
| LoopBlockSize(Context.int_const("LoopBlockSize")), | ||
| AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), | ||
| AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), | ||
| AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), | ||
| AlignedAlignment(Context.int_const("AlignedAlignment")), | ||
| AlignedArg(Context.int_const("AlignedArg")), | ||
| AcceleratorBegin(Context.int_const("AcceleratorBegin")), | ||
| AcceleratorEnd(Context.int_const("AcceleratorEnd")), | ||
| ElementClass(Context.int_const("ElementClass")) { | ||
| // All possible functions. | ||
| Solver.add(inSetConstraint(Type, kFunctionTypes)); | ||
|
|
||
| // Add constraints for region bounds. | ||
| addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); | ||
| addBoundsAndAnchors(OverlapBegin, OverlapEnd); | ||
| addBoundsAndAnchors(LoopBegin, LoopEnd); | ||
| addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); | ||
| addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); | ||
| // We always consider strategies in this order, and we | ||
| // always end with the `Accelerator` strategy, as it's typically more | ||
| // efficient for large sizes. | ||
| // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator | ||
| Solver.add(ContiguousEnd == OverlapBegin); | ||
| Solver.add(OverlapEnd == LoopBegin); | ||
| Solver.add(LoopEnd == AlignedLoopBegin); | ||
| Solver.add(AlignedLoopEnd == AcceleratorBegin); | ||
| // Fix endpoints: The minimum size that we want to copy is 0, and we always | ||
| // start with the `Contiguous` strategy. The max size is `kMaxSize`. | ||
| Solver.add(ContiguousBegin == 0); | ||
| Solver.add(AcceleratorEnd == kMaxSize); | ||
| // Contiguous | ||
| Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); | ||
| // Overlap | ||
| Solver.add(OverlapEnd <= kMaxOverlapSize + 1); | ||
| // Overlap only ever makes sense when accessing multiple bytes at a time. | ||
| // i.e. Overlap<1> is useless. | ||
| Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); | ||
| // Loop | ||
| addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); | ||
| // Aligned Loop | ||
| addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, | ||
| kAlignedLoopMinIter); | ||
| Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); | ||
| Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); | ||
| Solver.add(AlignedLoopBlockSize >= AlignedAlignment); | ||
| Solver.add(AlignedLoopBlockSize >= LoopBlockSize); | ||
| z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; | ||
| z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; | ||
| Solver.add( | ||
| (ExploreAlignment && | ||
| inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || | ||
| (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); | ||
| // Accelerator | ||
| Solver.add(IsMemcpy || | ||
| (AcceleratorBegin == | ||
| AcceleratorEnd)); // Only Memcpy has accelerator for now. | ||
| // Element classes | ||
| Solver.add(inSetConstraint(ElementClass, kElementClasses)); | ||
|
|
||
| if (kDisableLoop) | ||
| Solver.add(LoopBegin == LoopEnd); | ||
| if (kDisableAlignedLoop) | ||
| Solver.add(AlignedLoopBegin == AlignedLoopEnd); | ||
| if (kDisableAccelerator) | ||
| Solver.add(AcceleratorBegin == AcceleratorEnd); | ||
| } | ||
|
|
||
| // Creates SizeSpan from Begin/End values. | ||
| // Returns llvm::None if Begin==End. | ||
| static Optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) { | ||
| if (Begin == End) | ||
| return None; | ||
| SizeSpan SS; | ||
| SS.Begin = Begin; | ||
| SS.End = End; | ||
| return SS; | ||
| } | ||
|
|
||
| // Generic method to create a `Region` struct with a Span or None if span is | ||
| // empty. | ||
| template <typename Region> | ||
| static Optional<Region> As(size_t Begin, size_t End) { | ||
| if (auto Span = AsSizeSpan(Begin, End)) { | ||
| Region Output; | ||
| Output.Span = *Span; | ||
| return Output; | ||
| } | ||
| return None; | ||
| } | ||
|
|
||
| // Returns a Loop struct or None if span is empty. | ||
| static Optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) { | ||
| if (auto Span = AsSizeSpan(Begin, End)) { | ||
| Loop Output; | ||
| Output.Span = *Span; | ||
| Output.BlockSize = BlockSize; | ||
| return Output; | ||
| } | ||
| return None; | ||
| } | ||
|
|
||
| // Returns an AlignedLoop struct or None if span is empty. | ||
| static Optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End, | ||
| size_t BlockSize, size_t Alignment, | ||
| AlignArg AlignTo) { | ||
| if (auto Loop = AsLoop(Begin, End, BlockSize)) { | ||
| AlignedLoop Output; | ||
| Output.Loop = *Loop; | ||
| Output.Alignment = Alignment; | ||
| Output.AlignTo = AlignTo; | ||
| return Output; | ||
| } | ||
| return None; | ||
| } | ||
|
|
||
| Optional<FunctionDescriptor> RandomFunctionGenerator::next() { | ||
| if (Solver.check() != z3::sat) | ||
| return {}; | ||
|
|
||
| z3::model m = Solver.get_model(); | ||
|
|
||
| // Helper method to get the current numerical value of a z3::expr. | ||
| const auto E = [&m](z3::expr &V) -> int { | ||
| return m.eval(V).get_numeral_int(); | ||
| }; | ||
|
|
||
| // Fill is the function descriptor to return. | ||
| FunctionDescriptor R; | ||
| R.Type = FunctionType(E(Type)); | ||
| R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd)); | ||
| R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd)); | ||
| R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); | ||
| R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), | ||
| E(AlignedLoopBlockSize), E(AlignedAlignment), | ||
| AlignArg(E(AlignedArg))); | ||
| R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd)); | ||
| R.ElementClass = ElementTypeClass(E(ElementClass)); | ||
|
|
||
| // Express current state as a set of constraints. | ||
| z3::expr CurrentLayout = | ||
| (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && | ||
| (ContiguousEnd == E(ContiguousEnd)) && | ||
| (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && | ||
| (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && | ||
| (LoopBlockSize == E(LoopBlockSize)) && | ||
| (AlignedLoopBegin == E(AlignedLoopBegin)) && | ||
| (AlignedLoopEnd == E(AlignedLoopEnd)) && | ||
| (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && | ||
| (AlignedAlignment == E(AlignedAlignment)) && | ||
| (AlignedArg == E(AlignedArg)) && | ||
| (AcceleratorBegin == E(AcceleratorBegin)) && | ||
| (AcceleratorEnd == E(AcceleratorEnd)) && | ||
| (ElementClass == E(ElementClass)); | ||
|
|
||
| // Ask solver to never show this configuration ever again. | ||
| Solver.add(!CurrentLayout); | ||
| return R; | ||
| } | ||
|
|
||
| // Make sure `Variable` is one of the provided values. | ||
| z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, | ||
| ArrayRef<int> Values) const { | ||
| z3::expr_vector Args(Variable.ctx()); | ||
| for (int Value : Values) | ||
| Args.push_back(Variable == Value); | ||
| return z3::mk_or(Args); | ||
| } | ||
|
|
||
| void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, | ||
| z3::expr &End) { | ||
| // Begin and End are picked amongst a set of predefined values. | ||
| Solver.add(inSetConstraint(Begin, kAnchors)); | ||
| Solver.add(inSetConstraint(End, kAnchors)); | ||
| Solver.add(Begin >= 0); | ||
| Solver.add(Begin <= End); | ||
| Solver.add(End <= kMaxSize); | ||
| } | ||
|
|
||
| void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, | ||
| const z3::expr &LoopEnd, | ||
| z3::expr &LoopBlockSize, | ||
| int LoopMinIter) { | ||
| Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); | ||
| Solver.add(LoopBegin == LoopEnd || | ||
| (LoopBegin > (LoopMinIter * LoopBlockSize))); | ||
| } | ||
|
|
||
| } // namespace automemcpy | ||
| } // namespace llvm |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| //===-- Analyze benchmark JSON files --------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // This code analyzes the json file produced by the `automemcpy` binary. | ||
| // | ||
| // As a remainder, `automemcpy` will benchmark each autogenerated memory | ||
| // functions against one of the predefined distributions available in the | ||
| // `libc/benchmarks/distributions` folder. | ||
| // | ||
| // It works as follows: | ||
| // - Reads one or more json files. | ||
| // - If there are several runs for the same function and distribution, picks the | ||
| // median throughput (aka `BytesPerSecond`). | ||
| // - Aggregates the throughput per distributions and scores them from worst (0) | ||
| // to best (1). | ||
| // - Each distribution categorizes each function into one of the following | ||
| // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, | ||
| // BAD. | ||
| // - A process similar to the Majority Judgment voting system is used to `elect` | ||
| // the best function. The histogram of grades is returned so we can | ||
| // distinguish between functions with the same final grade. In the following | ||
| // example both functions grade EXCELLENT but we may prefer the second one. | ||
| // | ||
| // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... | ||
| // |------------|-----------|-----------|------|----------| ... | ||
| // | Function_1 | 7 | 1 | 2 | | ... | ||
| // | Function_2 | 6 | 4 | | | ... | ||
|
|
||
| #include "automemcpy/ResultAnalyzer.h" | ||
| #include "llvm/ADT/StringRef.h" | ||
| #include <numeric> | ||
| #include <unordered_map> | ||
|
|
||
| namespace llvm { | ||
|
|
||
| namespace automemcpy { | ||
|
|
||
| StringRef Grade::getString(const GradeEnum &GE) { | ||
| switch (GE) { | ||
| case EXCELLENT: | ||
| return "EXCELLENT"; | ||
| case VERY_GOOD: | ||
| return "VERY_GOOD"; | ||
| case GOOD: | ||
| return "GOOD"; | ||
| case PASSABLE: | ||
| return "PASSABLE"; | ||
| case INADEQUATE: | ||
| return "INADEQUATE"; | ||
| case MEDIOCRE: | ||
| return "MEDIOCRE"; | ||
| case BAD: | ||
| return "BAD"; | ||
| case ARRAY_SIZE: | ||
| report_fatal_error("logic error"); | ||
| } | ||
| } | ||
|
|
||
| Grade::GradeEnum Grade::judge(double Score) { | ||
| if (Score >= 6. / 7) | ||
| return EXCELLENT; | ||
| if (Score >= 5. / 7) | ||
| return VERY_GOOD; | ||
| if (Score >= 4. / 7) | ||
| return GOOD; | ||
| if (Score >= 3. / 7) | ||
| return PASSABLE; | ||
| if (Score >= 2. / 7) | ||
| return INADEQUATE; | ||
| if (Score >= 1. / 7) | ||
| return MEDIOCRE; | ||
| return BAD; | ||
| } | ||
|
|
||
| std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) { | ||
| std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher> | ||
| BucketedSamples; | ||
| for (const auto &S : Samples) | ||
| BucketedSamples[S.Id].push_back(S.BytesPerSecond); | ||
| std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher> | ||
| Throughputs; | ||
| for (auto &Pair : BucketedSamples) { | ||
| const auto &Id = Pair.first; | ||
| auto &Values = Pair.second; | ||
| const size_t HalfSize = Values.size() / 2; | ||
| std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end()); | ||
| const double MedianValue = Values[HalfSize]; | ||
| Throughputs[Id.Function][Id.Distribution.Name] = MedianValue; | ||
| } | ||
| std::vector<FunctionData> Output; | ||
| for (auto &Pair : Throughputs) { | ||
| FunctionData Data; | ||
| Data.Id = Pair.first; | ||
| for (const auto &Pair : Pair.second) | ||
| Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond = | ||
| Pair.getValue(); | ||
| Output.push_back(std::move(Data)); | ||
| } | ||
| return Output; | ||
| } | ||
|
|
||
| void fillScores(MutableArrayRef<FunctionData> Functions) { | ||
| // A key to bucket throughput per function type and distribution. | ||
| struct Key { | ||
| FunctionType Type; | ||
| StringRef Distribution; | ||
|
|
||
| COMPARABLE_AND_HASHABLE(Key, Type, Distribution) | ||
| }; | ||
|
|
||
| // Tracks minimum and maximum values. | ||
| struct MinMax { | ||
| double Min = std::numeric_limits<double>::max(); | ||
| double Max = std::numeric_limits<double>::min(); | ||
| void update(double Value) { | ||
| if (Value < Min) | ||
| Min = Value; | ||
| if (Value > Max) | ||
| Max = Value; | ||
| } | ||
| double normalize(double Value) const { return (Value - Min) / (Max - Min); } | ||
| }; | ||
|
|
||
| std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax; | ||
| for (const auto &Function : Functions) { | ||
| const FunctionType Type = Function.Id.Type; | ||
| for (const auto &Pair : Function.PerDistributionData) { | ||
| const auto &Distribution = Pair.getKey(); | ||
| const double Throughput = Pair.getValue().MedianBytesPerSecond; | ||
| const Key K{Type, Distribution}; | ||
| ThroughputMinMax[K].update(Throughput); | ||
| } | ||
| } | ||
|
|
||
| for (auto &Function : Functions) { | ||
| const FunctionType Type = Function.Id.Type; | ||
| for (const auto &Pair : Function.PerDistributionData) { | ||
| const auto &Distribution = Pair.getKey(); | ||
| const double Throughput = Pair.getValue().MedianBytesPerSecond; | ||
| const Key K{Type, Distribution}; | ||
| Function.PerDistributionData[Distribution].Score = | ||
| ThroughputMinMax[K].normalize(Throughput); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void castVotes(MutableArrayRef<FunctionData> Functions) { | ||
| for (FunctionData &Function : Functions) | ||
| for (const auto &Pair : Function.PerDistributionData) { | ||
| const StringRef Distribution = Pair.getKey(); | ||
| const double Score = Pair.getValue().Score; | ||
| const auto G = Grade::judge(Score); | ||
| ++(Function.GradeHisto[G]); | ||
| Function.PerDistributionData[Distribution].Grade = G; | ||
| } | ||
|
|
||
| for (FunctionData &Function : Functions) { | ||
| const auto &GradeHisto = Function.GradeHisto; | ||
| const size_t Votes = | ||
| std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); | ||
| const size_t MedianVote = Votes / 2; | ||
| size_t CountedVotes = 0; | ||
| Grade::GradeEnum MedianGrade = Grade::BAD; | ||
| for (size_t I = 0; I < GradeHisto.size(); ++I) { | ||
| CountedVotes += GradeHisto[I]; | ||
| if (CountedVotes > MedianVote) { | ||
| MedianGrade = Grade::GradeEnum(I); | ||
| break; | ||
| } | ||
| } | ||
| Function.FinalGrade = MedianGrade; | ||
| } | ||
| } | ||
|
|
||
| } // namespace automemcpy | ||
| } // namespace llvm |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| //===-- Application to analyze benchmark JSON files -----------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "automemcpy/ResultAnalyzer.h" | ||
| #include "llvm/ADT/StringMap.h" | ||
| #include "llvm/ADT/StringSet.h" | ||
| #include "llvm/Support/CommandLine.h" | ||
| #include "llvm/Support/Error.h" | ||
| #include "llvm/Support/JSON.h" | ||
| #include "llvm/Support/MemoryBuffer.h" | ||
|
|
||
| namespace llvm { | ||
|
|
||
| // User can specify one or more json filenames to process on the command line. | ||
| static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore, | ||
| cl::desc("<input json files>")); | ||
|
|
||
| namespace automemcpy { | ||
|
|
||
| // This is defined in the autogenerated 'Implementations.cpp' file. | ||
| extern ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors(); | ||
|
|
||
| // Iterates over all functions and fills a map of function name to function | ||
| // descriptor pointers. | ||
| static StringMap<const FunctionDescriptor *> createFunctionDescriptorMap() { | ||
| StringMap<const FunctionDescriptor *> Descriptors; | ||
| for (const NamedFunctionDescriptor &FD : getFunctionDescriptors()) | ||
| Descriptors.insert_or_assign(FD.Name, &FD.Desc); | ||
| return Descriptors; | ||
| } | ||
|
|
||
| // Retrieves the function descriptor for a particular function name. | ||
| static const FunctionDescriptor &getFunctionDescriptor(StringRef FunctionName) { | ||
| static StringMap<const FunctionDescriptor *> Descriptors = | ||
| createFunctionDescriptorMap(); | ||
| const auto *FD = Descriptors.lookup(FunctionName); | ||
| if (!FD) | ||
| report_fatal_error( | ||
| Twine("No FunctionDescriptor for ").concat(FunctionName)); | ||
| return *FD; | ||
| } | ||
|
|
||
| // Functions and distributions names are stored quite a few times so it's more | ||
| // efficient to internalize these strings and refer to them through 'StringRef'. | ||
| static StringRef getInternalizedString(StringRef VolatileStr) { | ||
| static llvm::StringSet<> StringCache; | ||
| return StringCache.insert(VolatileStr).first->getKey(); | ||
| } | ||
|
|
||
| // Helper function for the LLVM JSON API. | ||
| bool fromJSON(const json::Value &V, Sample &Out, json::Path P) { | ||
| std::string Label; | ||
| json::ObjectMapper O(V, P); | ||
| if (O && O.map("bytes_per_second", Out.BytesPerSecond) && | ||
| O.map("label", Label)) { | ||
| const auto LabelPair = StringRef(Label).split(','); | ||
| Out.Id.Function.Name = getInternalizedString(LabelPair.first); | ||
| Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type; | ||
| Out.Id.Distribution.Name = getInternalizedString(LabelPair.second); | ||
| return true; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| // An object to represent the content of the JSON file. | ||
| // This is easier to parse/serialize JSON when the structures of the json file | ||
| // maps the structure of the object. | ||
| struct JsonFile { | ||
| std::vector<Sample> Samples; | ||
| }; | ||
|
|
||
| // Helper function for the LLVM JSON API. | ||
| bool fromJSON(const json::Value &V, JsonFile &JF, json::Path P) { | ||
| json::ObjectMapper O(V, P); | ||
| return O && O.map("benchmarks", JF.Samples); | ||
| } | ||
|
|
||
| // Global object to ease error reporting, it consumes errors and crash the | ||
| // application with a meaningful message. | ||
| static ExitOnError ExitOnErr; | ||
|
|
||
| // Main JSON parsing method. Reads the content of the file pointed to by | ||
| // 'Filename' and returns a JsonFile object. | ||
| JsonFile parseJsonResultFile(StringRef Filename) { | ||
| auto Buf = ExitOnErr(errorOrToExpected( | ||
| MemoryBuffer::getFile(Filename, /*bool IsText=*/true, | ||
| /*RequiresNullTerminator=*/false))); | ||
| auto JsonValue = ExitOnErr(json::parse(Buf->getBuffer())); | ||
| json::Path::Root Root; | ||
| JsonFile JF; | ||
| if (!fromJSON(JsonValue, JF, Root)) | ||
| ExitOnErr(Root.getError()); | ||
| return JF; | ||
| } | ||
|
|
||
| // Serializes the 'GradeHisto' to the provided 'Stream'. | ||
| static void Serialize(raw_ostream &Stream, const GradeHistogram &GH) { | ||
| static constexpr std::array<StringRef, 9> kCharacters = { | ||
| " ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}; | ||
|
|
||
| const size_t Max = *std::max_element(GH.begin(), GH.end()); | ||
| for (size_t I = 0; I < GH.size(); ++I) { | ||
| size_t Index = (float(GH[I]) / Max) * (kCharacters.size() - 1); | ||
| Stream << kCharacters.at(Index); | ||
| } | ||
| } | ||
|
|
||
| int Main(int argc, char **argv) { | ||
| ExitOnErr.setBanner("Automemcpy Json Results Analyzer stopped with error: "); | ||
| cl::ParseCommandLineOptions(argc, argv, "Automemcpy Json Results Analyzer\n"); | ||
|
|
||
| // Reads all samples stored in the input JSON files. | ||
| std::vector<Sample> Samples; | ||
| for (const auto &Filename : InputFilenames) { | ||
| auto Result = parseJsonResultFile(Filename); | ||
| llvm::append_range(Samples, Result.Samples); | ||
| } | ||
|
|
||
| // Extracts median of throughputs. | ||
| std::vector<FunctionData> Functions = getThroughputs(Samples); | ||
| fillScores(Functions); | ||
| castVotes(Functions); | ||
|
|
||
| // TODO: Implement tie breaking algorithm. | ||
| std::sort(Functions.begin(), Functions.end(), | ||
| [](const FunctionData &A, const FunctionData &B) { | ||
| return A.FinalGrade < B.FinalGrade; | ||
| }); | ||
|
|
||
| // Present data by function type. | ||
| std::stable_sort(Functions.begin(), Functions.end(), | ||
| [](const FunctionData &A, const FunctionData &B) { | ||
| return A.Id.Type < B.Id.Type; | ||
| }); | ||
|
|
||
| // Print result. | ||
| for (const FunctionData &Function : Functions) { | ||
| outs() << formatv("{0,-10}", Grade::getString(Function.FinalGrade)); | ||
| outs() << " |"; | ||
| Serialize(outs(), Function.GradeHisto); | ||
| outs() << "| "; | ||
| outs().resetColor(); | ||
| outs() << formatv("{0,+25}", Function.Id.Name); | ||
| outs() << "\n"; | ||
| } | ||
|
|
||
| return EXIT_SUCCESS; | ||
| } | ||
|
|
||
| } // namespace automemcpy | ||
| } // namespace llvm | ||
|
|
||
| int main(int argc, char **argv) { return llvm::automemcpy::Main(argc, argv); } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| add_libc_benchmark_unittest(libc-automemcpy-codegen-test | ||
| SRCS CodeGenTest.cpp | ||
| DEPENDS automemcpy_codegen | ||
| ) | ||
|
|
||
| add_libc_benchmark_unittest(libc-automemcpy-result-analyzer-test | ||
| SRCS ResultAnalyzerTest.cpp | ||
| DEPENDS automemcpy_result_analyzer_lib | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,219 @@ | ||
| //===-- Automemcpy CodeGen Test -------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "automemcpy/CodeGen.h" | ||
| #include "automemcpy/RandomFunctionGenerator.h" | ||
| #include "gmock/gmock.h" | ||
| #include "gtest/gtest.h" | ||
|
|
||
| using testing::AllOf; | ||
| using testing::AnyOf; | ||
| using testing::ElementsAre; | ||
| using testing::Ge; | ||
| using testing::Gt; | ||
| using testing::Le; | ||
| using testing::Lt; | ||
|
|
||
| namespace llvm { | ||
| namespace automemcpy { | ||
| namespace { | ||
|
|
||
| TEST(Automemcpy, Codegen) { | ||
| static constexpr FunctionDescriptor kDescriptors[] = { | ||
| {FunctionType::MEMCPY, llvm::None, llvm::None, llvm::None, llvm::None, | ||
| Accelerator{{0, kMaxSize}}, ElementTypeClass::NATIVE}, | ||
| {FunctionType::MEMCPY, Contiguous{{0, 4}}, Overlap{{4, 256}}, | ||
| Loop{{256, kMaxSize}, 64}, llvm::None, llvm::None, | ||
| ElementTypeClass::NATIVE}, | ||
| {FunctionType::MEMCMP, Contiguous{{0, 2}}, Overlap{{2, 64}}, llvm::None, | ||
| AlignedLoop{Loop{{64, kMaxSize}, 16}, 16, AlignArg::_1}, llvm::None, | ||
| ElementTypeClass::NATIVE}, | ||
| {FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, llvm::None, | ||
| AlignedLoop{Loop{{256, kMaxSize}, 32}, 16, AlignArg::_1}, llvm::None, | ||
| ElementTypeClass::NATIVE}, | ||
| {FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, llvm::None, | ||
| AlignedLoop{Loop{{256, kMaxSize}, 32}, 32, AlignArg::_1}, llvm::None, | ||
| ElementTypeClass::NATIVE}, | ||
| {FunctionType::BZERO, Contiguous{{0, 4}}, Overlap{{4, 128}}, llvm::None, | ||
| AlignedLoop{Loop{{128, kMaxSize}, 32}, 32, AlignArg::_1}, llvm::None, | ||
| ElementTypeClass::NATIVE}, | ||
| }; | ||
|
|
||
| std::string Output; | ||
| raw_string_ostream OutputStream(Output); | ||
| Serialize(OutputStream, kDescriptors); | ||
|
|
||
| EXPECT_STREQ(OutputStream.str().c_str(), | ||
| R"(// This file is auto-generated by libc/benchmarks/automemcpy. | ||
| // Functions : 6 | ||
| #include "LibcFunctionPrototypes.h" | ||
| #include "automemcpy/FunctionDescriptor.h" | ||
| #include "src/string/memory_utils/elements.h" | ||
| using llvm::libc_benchmarks::BzeroConfiguration; | ||
| using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration; | ||
| using llvm::libc_benchmarks::MemcpyConfiguration; | ||
| using llvm::libc_benchmarks::MemsetConfiguration; | ||
| namespace __llvm_libc { | ||
| static void memcpy_0xE00E29EE73994E2B(char *__restrict dst, const char *__restrict src, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| return Copy<Accelerator>(dst, src, size); | ||
| } | ||
| static void memcpy_0x7381B60C7BE75EF9(char *__restrict dst, const char *__restrict src, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| if(size == 0) return; | ||
| if(size == 1) return Copy<_1>(dst, src); | ||
| if(size == 2) return Copy<_2>(dst, src); | ||
| if(size == 3) return Copy<_3>(dst, src); | ||
| if(size < 8) return Copy<HeadTail<_4>>(dst, src, size); | ||
| if(size < 16) return Copy<HeadTail<_8>>(dst, src, size); | ||
| if(size < 32) return Copy<HeadTail<_16>>(dst, src, size); | ||
| if(size < 64) return Copy<HeadTail<_32>>(dst, src, size); | ||
| if(size < 128) return Copy<HeadTail<_64>>(dst, src, size); | ||
| if(size < 256) return Copy<HeadTail<_128>>(dst, src, size); | ||
| return Copy<Loop<_64>>(dst, src, size); | ||
| } | ||
| static int memcmp_0x348D7BA6DB0EE033(const char * lhs, const char * rhs, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| if(size == 0) return 0; | ||
| if(size == 1) return ThreeWayCompare<_1>(lhs, rhs); | ||
| if(size < 4) return ThreeWayCompare<HeadTail<_2>>(lhs, rhs, size); | ||
| if(size < 8) return ThreeWayCompare<HeadTail<_4>>(lhs, rhs, size); | ||
| if(size < 16) return ThreeWayCompare<HeadTail<_8>>(lhs, rhs, size); | ||
| if(size < 32) return ThreeWayCompare<HeadTail<_16>>(lhs, rhs, size); | ||
| if(size < 64) return ThreeWayCompare<HeadTail<_32>>(lhs, rhs, size); | ||
| return ThreeWayCompare<Align<_16,Arg::Lhs>::Then<Loop<_16>>>(lhs, rhs, size); | ||
| } | ||
| static void memset_0x71E761699B999863(char * dst, int value, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| if(size == 0) return; | ||
| if(size == 1) return SplatSet<_1>(dst, value); | ||
| if(size < 4) return SplatSet<HeadTail<_2>>(dst, value, size); | ||
| if(size < 8) return SplatSet<HeadTail<_4>>(dst, value, size); | ||
| if(size < 16) return SplatSet<HeadTail<_8>>(dst, value, size); | ||
| if(size < 32) return SplatSet<HeadTail<_16>>(dst, value, size); | ||
| if(size < 64) return SplatSet<HeadTail<_32>>(dst, value, size); | ||
| if(size < 128) return SplatSet<HeadTail<_64>>(dst, value, size); | ||
| if(size < 256) return SplatSet<HeadTail<_128>>(dst, value, size); | ||
| return SplatSet<Align<_16,Arg::Dst>::Then<Loop<_32>>>(dst, value, size); | ||
| } | ||
| static void memset_0x3DF0F44E2ED6A50F(char * dst, int value, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| if(size == 0) return; | ||
| if(size == 1) return SplatSet<_1>(dst, value); | ||
| if(size < 4) return SplatSet<HeadTail<_2>>(dst, value, size); | ||
| if(size < 8) return SplatSet<HeadTail<_4>>(dst, value, size); | ||
| if(size < 16) return SplatSet<HeadTail<_8>>(dst, value, size); | ||
| if(size < 32) return SplatSet<HeadTail<_16>>(dst, value, size); | ||
| if(size < 64) return SplatSet<HeadTail<_32>>(dst, value, size); | ||
| if(size < 128) return SplatSet<HeadTail<_64>>(dst, value, size); | ||
| if(size < 256) return SplatSet<HeadTail<_128>>(dst, value, size); | ||
| return SplatSet<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, value, size); | ||
| } | ||
| static void bzero_0x475977492C218AD4(char * dst, size_t size) { | ||
| using namespace __llvm_libc::x86; | ||
| if(size == 0) return; | ||
| if(size == 1) return SplatSet<_1>(dst, 0); | ||
| if(size == 2) return SplatSet<_2>(dst, 0); | ||
| if(size == 3) return SplatSet<_3>(dst, 0); | ||
| if(size < 8) return SplatSet<HeadTail<_4>>(dst, 0, size); | ||
| if(size < 16) return SplatSet<HeadTail<_8>>(dst, 0, size); | ||
| if(size < 32) return SplatSet<HeadTail<_16>>(dst, 0, size); | ||
| if(size < 64) return SplatSet<HeadTail<_32>>(dst, 0, size); | ||
| if(size < 128) return SplatSet<HeadTail<_64>>(dst, 0, size); | ||
| return SplatSet<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, 0, size); | ||
| } | ||
| } // namespace __llvm_libc | ||
| namespace llvm { | ||
| namespace automemcpy { | ||
| ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors() { | ||
| static constexpr NamedFunctionDescriptor kDescriptors[] = { | ||
| {"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,llvm::None,llvm::None,llvm::None,llvm::None,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}}, | ||
| {"memcpy_0x7381B60C7BE75EF9",{FunctionType::MEMCPY,Contiguous{{0,4}},Overlap{{4,256}},Loop{{256,kMaxSize},64},llvm::None,llvm::None,ElementTypeClass::NATIVE}}, | ||
| {"memcmp_0x348D7BA6DB0EE033",{FunctionType::MEMCMP,Contiguous{{0,2}},Overlap{{2,64}},llvm::None,AlignedLoop{Loop{{64,kMaxSize},16},16,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}}, | ||
| {"memset_0x71E761699B999863",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},llvm::None,AlignedLoop{Loop{{256,kMaxSize},32},16,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}}, | ||
| {"memset_0x3DF0F44E2ED6A50F",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},llvm::None,AlignedLoop{Loop{{256,kMaxSize},32},32,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}}, | ||
| {"bzero_0x475977492C218AD4",{FunctionType::BZERO,Contiguous{{0,4}},Overlap{{4,128}},llvm::None,AlignedLoop{Loop{{128,kMaxSize},32},32,AlignArg::_1},llvm::None,ElementTypeClass::NATIVE}}, | ||
| }; | ||
| return makeArrayRef(kDescriptors); | ||
| } | ||
| } // namespace automemcpy | ||
| } // namespace llvm | ||
| using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t); | ||
| template <MemcpyStub Foo> | ||
| void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) { | ||
| Foo(reinterpret_cast<char *__restrict>(dst), | ||
| reinterpret_cast<const char *__restrict>(src), size); | ||
| return dst; | ||
| } | ||
| llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations() { | ||
| using namespace __llvm_libc; | ||
| static constexpr MemcpyConfiguration kConfigurations[] = { | ||
| {Wrap<memcpy_0xE00E29EE73994E2B>, "memcpy_0xE00E29EE73994E2B"}, | ||
| {Wrap<memcpy_0x7381B60C7BE75EF9>, "memcpy_0x7381B60C7BE75EF9"}, | ||
| }; | ||
| return llvm::makeArrayRef(kConfigurations); | ||
| } | ||
| using MemcmpStub = int (*)(const char *, const char *, size_t); | ||
| template <MemcmpStub Foo> | ||
| int Wrap(const void *lhs, const void *rhs, size_t size) { | ||
| return Foo(reinterpret_cast<const char *>(lhs), | ||
| reinterpret_cast<const char *>(rhs), size); | ||
| } | ||
| llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations() { | ||
| using namespace __llvm_libc; | ||
| static constexpr MemcmpOrBcmpConfiguration kConfigurations[] = { | ||
| {Wrap<memcmp_0x348D7BA6DB0EE033>, "memcmp_0x348D7BA6DB0EE033"}, | ||
| }; | ||
| return llvm::makeArrayRef(kConfigurations); | ||
| } | ||
| llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations() { | ||
| return {}; | ||
| } | ||
| using MemsetStub = void (*)(char *, int, size_t); | ||
| template <MemsetStub Foo> void *Wrap(void *dst, int value, size_t size) { | ||
| Foo(reinterpret_cast<char *>(dst), value, size); | ||
| return dst; | ||
| } | ||
| llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations() { | ||
| using namespace __llvm_libc; | ||
| static constexpr MemsetConfiguration kConfigurations[] = { | ||
| {Wrap<memset_0x71E761699B999863>, "memset_0x71E761699B999863"}, | ||
| {Wrap<memset_0x3DF0F44E2ED6A50F>, "memset_0x3DF0F44E2ED6A50F"}, | ||
| }; | ||
| return llvm::makeArrayRef(kConfigurations); | ||
| } | ||
| using BzeroStub = void (*)(char *, size_t); | ||
| template <BzeroStub Foo> void Wrap(void *dst, size_t size) { | ||
| Foo(reinterpret_cast<char *>(dst), size); | ||
| } | ||
| llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations() { | ||
| using namespace __llvm_libc; | ||
| static constexpr BzeroConfiguration kConfigurations[] = { | ||
| {Wrap<bzero_0x475977492C218AD4>, "bzero_0x475977492C218AD4"}, | ||
| }; | ||
| return llvm::makeArrayRef(kConfigurations); | ||
| } | ||
| // Functions : 6 | ||
| )"); | ||
| } | ||
| } // namespace | ||
| } // namespace automemcpy | ||
| } // namespace llvm |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,170 @@ | ||
| //===-- Automemcpy Json Results Analyzer Test ----------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "automemcpy/ResultAnalyzer.h" | ||
| #include "gmock/gmock.h" | ||
| #include "gtest/gtest.h" | ||
|
|
||
| using testing::ElementsAre; | ||
| using testing::Pair; | ||
| using testing::SizeIs; | ||
|
|
||
| namespace llvm { | ||
| namespace automemcpy { | ||
| namespace { | ||
|
|
||
| TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) { | ||
| static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId DistA = {{"A"}}; | ||
| static constexpr SampleId Id = {Foo1, DistA}; | ||
| static constexpr Sample kSamples[] = { | ||
| Sample{Id, 4}, | ||
| }; | ||
|
|
||
| const std::vector<FunctionData> Data = getThroughputs(kSamples); | ||
| EXPECT_THAT(Data, SizeIs(1)); | ||
| EXPECT_THAT(Data[0].Id, Foo1); | ||
| EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); | ||
| // A single value is provided. | ||
| EXPECT_THAT( | ||
| Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4); | ||
| } | ||
|
|
||
| TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) { | ||
| static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId DistA = {{"A"}}; | ||
| static constexpr SampleId Id = {Foo1, DistA}; | ||
| static constexpr Sample kSamples[] = {Sample{Id, 4}, Sample{Id, 5}, | ||
| Sample{Id, 5}}; | ||
|
|
||
| const std::vector<FunctionData> Data = getThroughputs(kSamples); | ||
| EXPECT_THAT(Data, SizeIs(1)); | ||
| EXPECT_THAT(Data[0].Id, Foo1); | ||
| EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); | ||
| // When multiple values are provided we pick the median one (here median of 4, | ||
| // 5, 5). | ||
| EXPECT_THAT( | ||
| Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5); | ||
| } | ||
|
|
||
| TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) { | ||
| static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId DistA = {{"A"}}; | ||
| static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId DistB = {{"B"}}; | ||
| static constexpr Sample kSamples[] = { | ||
| Sample{{Foo1, DistA}, 1}, Sample{{Foo1, DistB}, 2}, | ||
| Sample{{Foo2, DistA}, 3}, Sample{{Foo2, DistB}, 4}}; | ||
| // Data is aggregated per function. | ||
| const std::vector<FunctionData> Data = getThroughputs(kSamples); | ||
| EXPECT_THAT(Data, SizeIs(2)); // 2 functions Foo1 and Foo2. | ||
| // Each function has data for both distributions DistA and DistB. | ||
| EXPECT_THAT(Data[0].PerDistributionData, SizeIs(2)); | ||
| EXPECT_THAT(Data[1].PerDistributionData, SizeIs(2)); | ||
| } | ||
|
|
||
| TEST(AutomemcpyJsonResultsAnalyzer, getScore) { | ||
| static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; | ||
| static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; | ||
| static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId Dist = {{"A"}}; | ||
| static constexpr Sample kSamples[] = {Sample{{Foo1, Dist}, 1}, | ||
| Sample{{Foo2, Dist}, 2}, | ||
| Sample{{Foo3, Dist}, 3}}; | ||
|
|
||
| // Data is aggregated per function. | ||
| std::vector<FunctionData> Data = getThroughputs(kSamples); | ||
|
|
||
| // Sort Data by function name so we can test them. | ||
| std::sort( | ||
| Data.begin(), Data.end(), | ||
| [](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; }); | ||
|
|
||
| EXPECT_THAT(Data[0].Id, Foo1); | ||
| EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1); | ||
| EXPECT_THAT(Data[1].Id, Foo2); | ||
| EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2); | ||
| EXPECT_THAT(Data[2].Id, Foo3); | ||
| EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3); | ||
|
|
||
| // Normalizes throughput per distribution. | ||
| fillScores(Data); | ||
| EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Score, 0); | ||
| EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Score, 0.5); | ||
| EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Score, 1); | ||
| } | ||
|
|
||
| TEST(AutomemcpyJsonResultsAnalyzer, castVotes) { | ||
| static constexpr double kAbsErr = 0.01; | ||
|
|
||
| static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; | ||
| static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; | ||
| static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY}; | ||
| static constexpr DistributionId DistA = {{"A"}}; | ||
| static constexpr DistributionId DistB = {{"B"}}; | ||
| static constexpr Sample kSamples[] = { | ||
| Sample{{Foo1, DistA}, 0}, Sample{{Foo1, DistB}, 30}, | ||
| Sample{{Foo2, DistA}, 1}, Sample{{Foo2, DistB}, 100}, | ||
| Sample{{Foo3, DistA}, 7}, Sample{{Foo3, DistB}, 100}, | ||
| }; | ||
|
|
||
| // DistA Thoughput ranges from 0 to 7. | ||
| // DistB Thoughput ranges from 30 to 100. | ||
|
|
||
| // Data is aggregated per function. | ||
| std::vector<FunctionData> Data = getThroughputs(kSamples); | ||
|
|
||
| // Sort Data by function name so we can test them. | ||
| std::sort( | ||
| Data.begin(), Data.end(), | ||
| [](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; }); | ||
|
|
||
| // Normalizes throughput per distribution. | ||
| fillScores(Data); | ||
|
|
||
| // Cast votes | ||
| castVotes(Data); | ||
|
|
||
| EXPECT_THAT(Data[0].Id, Foo1); | ||
| EXPECT_THAT(Data[1].Id, Foo2); | ||
| EXPECT_THAT(Data[2].Id, Foo3); | ||
|
|
||
| // Distribution A | ||
| // Throughput is 0, 1 and 7, so normalized scores are 0, 1/7 and 1. | ||
| EXPECT_NEAR(Data[0].PerDistributionData.lookup("A").Score, 0, kAbsErr); | ||
| EXPECT_NEAR(Data[1].PerDistributionData.lookup("A").Score, 1. / 7, kAbsErr); | ||
| EXPECT_NEAR(Data[2].PerDistributionData.lookup("A").Score, 1, kAbsErr); | ||
| // which are turned into grades BAD, MEDIOCRE and EXCELLENT. | ||
| EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Grade, Grade::BAD); | ||
| EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Grade, Grade::MEDIOCRE); | ||
| EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Grade, Grade::EXCELLENT); | ||
|
|
||
| // Distribution B | ||
| // Throughput is 30, 100 and 100, so normalized scores are 0, 1 and 1. | ||
| EXPECT_NEAR(Data[0].PerDistributionData.lookup("B").Score, 0, kAbsErr); | ||
| EXPECT_NEAR(Data[1].PerDistributionData.lookup("B").Score, 1, kAbsErr); | ||
| EXPECT_NEAR(Data[2].PerDistributionData.lookup("B").Score, 1, kAbsErr); | ||
| // which are turned into grades BAD, EXCELLENT and EXCELLENT. | ||
| EXPECT_THAT(Data[0].PerDistributionData.lookup("B").Grade, Grade::BAD); | ||
| EXPECT_THAT(Data[1].PerDistributionData.lookup("B").Grade, Grade::EXCELLENT); | ||
| EXPECT_THAT(Data[2].PerDistributionData.lookup("B").Grade, Grade::EXCELLENT); | ||
|
|
||
| // Now looking from the functions point of view. | ||
| // Note the array is indexed by GradeEnum values (EXCELLENT=0 / BAD = 6) | ||
| EXPECT_THAT(Data[0].GradeHisto, ElementsAre(0, 0, 0, 0, 0, 0, 2)); | ||
| EXPECT_THAT(Data[1].GradeHisto, ElementsAre(1, 0, 0, 0, 0, 1, 0)); | ||
| EXPECT_THAT(Data[2].GradeHisto, ElementsAre(2, 0, 0, 0, 0, 0, 0)); | ||
|
|
||
| EXPECT_THAT(Data[0].FinalGrade, Grade::BAD); | ||
| EXPECT_THAT(Data[1].FinalGrade, Grade::MEDIOCRE); | ||
| EXPECT_THAT(Data[2].FinalGrade, Grade::EXCELLENT); | ||
| } | ||
|
|
||
| } // namespace | ||
| } // namespace automemcpy | ||
| } // namespace llvm |