332 changes: 116 additions & 216 deletions libc/benchmarks/LibcMemoryBenchmarkMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"

#include <cstring>
#include <unistd.h>

namespace __llvm_libc {

Expand Down Expand Up @@ -62,172 +64,43 @@ static cl::opt<uint32_t>
NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
cl::init(1));

static constexpr int64_t KiB = 1024;
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;

struct ParameterType {
unsigned OffsetBytes : 16; // max : 16 KiB - 1
unsigned SizeBytes : 16; // max : 16 KiB - 1
};

#if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY)
struct Benchmark {
static constexpr auto GetDistributions = &getMemcpySizeDistributions;
static constexpr size_t BufferCount = 2;

Benchmark(const size_t BufferSize)
: SrcBuffer(BufferSize), DstBuffer(BufferSize) {}

inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}

AlignedBuffer SrcBuffer;
AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMCPY
using BenchmarkHarness = CopyHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET)
struct Benchmark {
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
static constexpr size_t BufferCount = 1;

Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}

inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}

AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMSET
using BenchmarkHarness = SetHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_BZERO)
struct Benchmark {
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
static constexpr size_t BufferCount = 1;

Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}

inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::bzero(DstBuffer + P.OffsetBytes, P.SizeBytes);
return DstBuffer[P.OffsetBytes];
};
}

AlignedBuffer DstBuffer;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_BZERO
using BenchmarkHarness = SetHarness;
#elif defined(LIBC_BENCHMARK_FUNCTION_MEMCMP)
struct Benchmark {
static constexpr auto GetDistributions = &getMemcmpSizeDistributions;
static constexpr size_t BufferCount = 2;

Benchmark(const size_t BufferSize)
: BufferA(BufferSize), BufferB(BufferSize) {
// The memcmp buffers always compare equal.
memset(BufferA.begin(), 0xF, BufferSize);
memset(BufferB.begin(), 0xF, BufferSize);
}

inline auto functor() {
return [this](ParameterType P) {
return __llvm_libc::memcmp(BufferA + P.OffsetBytes,
BufferB + P.OffsetBytes, P.SizeBytes);
};
}

AlignedBuffer BufferA;
AlignedBuffer BufferB;
};
#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMCMP
using BenchmarkHarness = ComparisonHarness;
#else
#error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition"
#endif

struct Harness : Benchmark {
Harness(const size_t BufferSize, size_t BatchParameterCount,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler)
: Benchmark(BufferSize), BufferSize(BufferSize),
Parameters(BatchParameterCount), SizeSampler(SizeSampler),
OffsetSampler(OffsetSampler) {}
struct MemfunctionBenchmarkBase : public BenchmarkHarness {
MemfunctionBenchmarkBase() : ReportProgress(isatty(fileno(stdout))) {}
virtual ~MemfunctionBenchmarkBase() {}

CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler();
P.SizeBytes = SizeSampler();
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
report_fatal_error("Call would result in buffer overflow");
}
virtual Study run() = 0;

CircularArrayRef<ParameterBatch::ParameterType>
generateBatch(size_t Iterations) {
randomize();
return cycle(makeArrayRef(Parameters), Iterations);
}

private:
const size_t BufferSize;
std::vector<ParameterType> Parameters;
std::function<unsigned()> SizeSampler;
std::function<unsigned()> OffsetSampler;
};

size_t getL1DataCacheSize() {
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
const auto IsL1DataCache = [](const CacheInfo &CI) {
return CI.Type == "Data" && CI.Level == 1;
};
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
if (CacheIt != CacheInfos.end())
return CacheIt->Size;
report_fatal_error("Unable to read L1 Cache Data Size");
}

struct MemfunctionBenchmark {
MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
: AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
BufferSize(AvailableSize / Benchmark::BufferCount),
BatchParameterCount(BufferSize / sizeof(ParameterType)) {
// Handling command line flags
if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
report_fatal_error("Not enough L1 cache");

if (!isPowerOfTwoOrZero(AlignedAccess))
report_fatal_error(AlignedAccess.ArgStr +
Twine(" must be a power of two or zero"));

const bool HasDistributionName = !SizeDistributionName.empty();
if (SweepMode && HasDistributionName)
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");

if (SweepMode) {
MaxSizeValue = SweepMaxSize;
} else {
std::map<StringRef, MemorySizeDistribution> Map;
for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
Map[Distribution.Name] = Distribution;
if (Map.count(SizeDistributionName) == 0) {
std::string Message;
raw_string_ostream Stream(Message);
Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
<< SizeDistributionName << "', available distributions:\n";
for (const auto &Pair : Map)
Stream << "'" << Pair.first << "'\n";
report_fatal_error(Stream.str());
}
SizeDistribution = Map[SizeDistributionName];
MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
}

// Setup study.
protected:
Study createStudy() {
Study Study;
// Harness study.
Study.StudyName = StudyName;
Runtime &RI = Study.Runtime;
RI.Host = HostState::get();
RI.BufferSize = BufferSize;
RI.BatchParameterCount = BatchParameterCount;
RI.BatchParameterCount = BatchSize;

BenchmarkOptions &BO = RI.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
Expand All @@ -241,56 +114,34 @@ struct MemfunctionBenchmark {
StudyConfiguration &SC = Study.Configuration;
SC.NumTrials = NumTrials;
SC.IsSweepMode = SweepMode;
if (SweepMode)
SC.SweepModeMaxSize = SweepMaxSize;
else
SC.SizeDistributionName = SizeDistributionName;
SC.AccessAlignment = MaybeAlign(AlignedAccess);
SC.Function = LIBC_BENCHMARK_FUNCTION_NAME;
}

Study run() {
if (SweepMode)
runSweepMode();
else
runDistributionMode();
return Study;
}

private:
const int64_t AvailableSize;
const int64_t BufferSize;
const size_t BatchParameterCount;
size_t MaxSizeValue = 0;
MemorySizeDistribution SizeDistribution;
Study Study;
std::mt19937_64 Gen;

static constexpr bool isPowerOfTwoOrZero(size_t Value) {
return (Value & (Value - 1U)) == 0;
void runTrials(const BenchmarkOptions &Options,
std::vector<Duration> &Measurements) {
for (size_t i = 0; i < NumTrials; ++i) {
const BenchmarkResult Result = benchmark(
Options, *this, [this](ParameterBatch::ParameterType Parameter) {
return Call(Parameter, LIBC_BENCHMARK_FUNCTION);
});
Measurements.push_back(Result.BestGuess);
reportProgress(Measurements);
}
}

std::function<unsigned()> geOffsetSampler() {
return [this]() {
static OffsetDistribution OD(BufferSize, MaxSizeValue,
Study.Configuration.AccessAlignment);
return OD(Gen);
};
}
virtual void randomize() = 0;

std::function<unsigned()> getSizeSampler() {
return [this]() {
static std::discrete_distribution<unsigned> Distribution(
SizeDistribution.Probabilities.begin(),
SizeDistribution.Probabilities.end());
return Distribution(Gen);
};
}
private:
bool ReportProgress;

void reportProgress() {
void reportProgress(const std::vector<Duration> &Measurements) {
if (!ReportProgress)
return;
static size_t LastPercent = -1;
const size_t TotalSteps = Study.Measurements.capacity();
const size_t Steps = Study.Measurements.size();
const size_t TotalSteps = Measurements.capacity();
const size_t Steps = Measurements.size();
const size_t Percent = 100 * Steps / TotalSteps;
if (Percent == LastPercent)
return;
Expand All @@ -303,40 +154,76 @@ struct MemfunctionBenchmark {
errs() << '_';
errs() << "] " << Percent << '%' << '\r';
}
};

void runTrials(const BenchmarkOptions &Options,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler) {
Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler);
for (size_t i = 0; i < NumTrials; ++i) {
const BenchmarkResult Result = benchmark(Options, B, B.functor());
Study.Measurements.push_back(Result.BestGuess);
reportProgress();
struct MemfunctionBenchmarkSweep final : public MemfunctionBenchmarkBase {
MemfunctionBenchmarkSweep()
: OffsetSampler(MemfunctionBenchmarkBase::BufferSize, SweepMaxSize,
MaybeAlign(AlignedAccess)) {}

virtual void randomize() override {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler(Gen);
P.SizeBytes = CurrentSweepSize;
checkValid(P);
}
}

void runSweepMode() {
Study.Measurements.reserve(NumTrials * SweepMaxSize);

virtual Study run() override {
Study Study = createStudy();
Study.Configuration.SweepModeMaxSize = SweepMaxSize;
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
BO.InitialIterations = 100;

auto &Measurements = Study.Measurements;
Measurements.reserve(NumTrials * SweepMaxSize);
for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
const auto SizeSampler = [Size]() { return Size; };
runTrials(BO, SizeSampler, geOffsetSampler());
CurrentSweepSize = Size;
runTrials(BO, Measurements);
}
return Study;
}

void runDistributionMode() {
Study.Measurements.reserve(NumTrials);
private:
size_t CurrentSweepSize = 0;
OffsetDistribution OffsetSampler;
std::mt19937_64 Gen;
};

struct MemfunctionBenchmarkDistribution final
: public MemfunctionBenchmarkBase {
MemfunctionBenchmarkDistribution(MemorySizeDistribution Distribution)
: Distribution(Distribution), Probabilities(Distribution.Probabilities),
SizeSampler(Probabilities.begin(), Probabilities.end()),
OffsetSampler(MemfunctionBenchmarkBase::BufferSize,
Probabilities.size() - 1, MaybeAlign(AlignedAccess)) {}

virtual void randomize() override {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler(Gen);
P.SizeBytes = SizeSampler(Gen);
checkValid(P);
}
}

virtual Study run() override {
Study Study = createStudy();
Study.Configuration.SizeDistributionName = Distribution.Name.str();
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(10);
BO.InitialIterations = BatchParameterCount * 10;

runTrials(BO, getSizeSampler(), geOffsetSampler());
BO.InitialIterations = BatchSize * 10;
auto &Measurements = Study.Measurements;
Measurements.reserve(NumTrials);
runTrials(BO, Measurements);
return Study;
}

private:
MemorySizeDistribution Distribution;
ArrayRef<double> Probabilities;
std::discrete_distribution<unsigned> SizeSampler;
OffsetDistribution OffsetSampler;
std::mt19937_64 Gen;
};

void writeStudy(const Study &S) {
Expand All @@ -354,20 +241,33 @@ void writeStudy(const Study &S) {

void main() {
checkRequirements();
MemfunctionBenchmark MB;
writeStudy(MB.run());
if (!isPowerOf2_32(AlignedAccess))
report_fatal_error(AlignedAccess.ArgStr +
Twine(" must be a power of two or zero"));

const bool HasDistributionName = !SizeDistributionName.empty();
if (SweepMode && HasDistributionName)
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");

std::unique_ptr<MemfunctionBenchmarkBase> Benchmark;
if (SweepMode)
Benchmark.reset(new MemfunctionBenchmarkSweep());
else
Benchmark.reset(new MemfunctionBenchmarkDistribution(getDistributionOrDie(
BenchmarkHarness::Distributions, SizeDistributionName)));
writeStudy(Benchmark->run());
}

} // namespace libc_benchmarks
} // namespace llvm

int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv);
#ifndef NDEBUG
static_assert(
false,
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
#error For reproducibility benchmarks should not be compiled in DEBUG mode.
#endif

int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv);
llvm::libc_benchmarks::main();
return EXIT_SUCCESS;
}
22 changes: 22 additions & 0 deletions libc/benchmarks/MemorySizeDistributions.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include "MemorySizeDistributions.h"

#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"

namespace llvm {
namespace libc_benchmarks {

Expand Down Expand Up @@ -135,5 +138,24 @@ ArrayRef<MemorySizeDistribution> getMemcmpSizeDistributions() {
};
return kDistributions;
}

MemorySizeDistribution
getDistributionOrDie(ArrayRef<MemorySizeDistribution> Distributions,
StringRef Name) {
size_t Index = 0;
for (const auto &MSD : Distributions) {
if (MSD.Name == Name)
return MSD;
++Index;
}
std::string Message;
raw_string_ostream Stream(Message);
Stream << "Unknown MemorySizeDistribution '" << Name
<< "', available distributions:\n";
for (const auto &MSD : Distributions)
Stream << "'" << MSD.Name << "'\n";
report_fatal_error(Stream.str());
}

} // namespace libc_benchmarks
} // namespace llvm
6 changes: 6 additions & 0 deletions libc/benchmarks/MemorySizeDistributions.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ ArrayRef<MemorySizeDistribution> getMemsetSizeDistributions();
/// Returns a list of memcmp size distributions.
ArrayRef<MemorySizeDistribution> getMemcmpSizeDistributions();

/// Returns the first MemorySizeDistribution from Distributions with the
/// specified Name.
MemorySizeDistribution
getDistributionOrDie(ArrayRef<MemorySizeDistribution> Distributions,
StringRef Name);

} // namespace libc_benchmarks
} // namespace llvm

Expand Down