Skip to content

Commit

Permalink
[HIP] add --offload-compression-level= option (#83605)
Browse files Browse the repository at this point in the history
Added --offload-compression-level= option to clang and
-compression-level=
option to clang-offload-bundler for controlling compression level.

Added support of long distance matching (LDM) for llvm::zstd which is
off
by default. Enable it for clang-offload-bundler by default since it
improves compression rate in general.

Change default compression level to 3 for zstd for clang-offload-bundler
since it works well for bundle entry size from 1KB to 32MB, which should
cover most of the clang-offload-bundler usage. Users can still specify
compression level by -compression-level= option if necessary.
  • Loading branch information
yxsamliu committed Mar 9, 2024
1 parent 83fe0b1 commit 124d0b7
Show file tree
Hide file tree
Showing 17 changed files with 204 additions and 64 deletions.
6 changes: 5 additions & 1 deletion clang/include/clang/Driver/OffloadBundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
#define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H

#include "llvm/Support/Compression.h"
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
#include <llvm/Support/MemoryBuffer.h>
Expand All @@ -36,6 +37,8 @@ class OffloadBundlerConfig {
bool HipOpenmpCompatible = false;
bool Compress = false;
bool Verbose = false;
llvm::compression::Format CompressionFormat;
int CompressionLevel;

unsigned BundleAlignment = 1;
unsigned HostInputIndex = ~0u;
Expand Down Expand Up @@ -116,7 +119,8 @@ class CompressedOffloadBundle {

public:
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
bool Verbose = false);
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
};
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
def offload_compress : Flag<["--"], "offload-compress">,
HelpText<"Compress offload device binaries (HIP only)">;
def no_offload_compress : Flag<["--"], "no-offload-compress">;

def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
Flags<[HelpHidden]>,
HelpText<"Compression level for offload device binaries (HIP only)">;
}

// CUDA options
Expand Down
111 changes: 85 additions & 26 deletions clang/lib/Driver/OffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
}

OffloadBundlerConfig::OffloadBundlerConfig() {
if (llvm::compression::zstd::isAvailable()) {
CompressionFormat = llvm::compression::Format::Zstd;
// Compression level 3 is usually sufficient for zstd since long distance
// matching is enabled.
CompressionLevel = 3;
} else if (llvm::compression::zlib::isAvailable()) {
CompressionFormat = llvm::compression::Format::Zlib;
// Use default level for zlib since higher level does not have significant
// improvement.
CompressionLevel = llvm::compression::zlib::DefaultCompression;
}
auto IgnoreEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
Expand All @@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
if (CompressEnvVarOpt.has_value())
Compress = CompressEnvVarOpt.value() == "1";

auto CompressionLevelEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
if (CompressionLevelEnvVarOpt.has_value()) {
llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
int Level;
if (!CompressionLevelStr.getAsInteger(10, Level))
CompressionLevel = Level;
else
llvm::errs()
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
<< CompressionLevelStr.str() << ". Ignoring it.\n";
}
}

// Utility function to format numbers with commas
static std::string formatWithCommas(unsigned long long Value) {
std::string Num = std::to_string(Value);
int InsertPosition = Num.length() - 3;
while (InsertPosition > 0) {
Num.insert(InsertPosition, ",");
InsertPosition -= 3;
}
return Num;
}

llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
CompressedOffloadBundle::compress(llvm::compression::Params P,
const llvm::MemoryBuffer &Input,
bool Verbose) {
if (!llvm::compression::zstd::isAvailable() &&
!llvm::compression::zlib::isAvailable())
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");

llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
Expand All @@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
Input.getBuffer().size());

llvm::compression::Format CompressionFormat;

if (llvm::compression::zstd::isAvailable())
CompressionFormat = llvm::compression::Format::Zstd;
else if (llvm::compression::zlib::isAvailable())
CompressionFormat = llvm::compression::Format::Zlib;
else
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");

llvm::Timer CompressTimer("Compression Timer", "Compression time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
CompressTimer.startTimer();
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
llvm::compression::compress(P, BufferUint8, CompressedBuffer);
if (Verbose)
CompressTimer.stopTimer();

uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
uint32_t UncompressedSize = Input.getBuffer().size();

SmallVector<char, 0> FinalBuffer;
Expand All @@ -995,17 +1026,29 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,

if (Verbose) {
auto MethodUsed =
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
double CompressionRate =
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
double CompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << Version << "\n"
<< "Compression method used: " << MethodUsed << "\n"
<< "Binary size before compression: " << UncompressedSize
<< " bytes\n"
<< "Binary size after compression: " << CompressedBuffer.size()
<< " bytes\n"
<< "Compression level: " << P.level << "\n"
<< "Binary size before compression: "
<< formatWithCommas(UncompressedSize) << " bytes\n"
<< "Binary size after compression: "
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
<< "Compression rate: "
<< llvm::format("%.2lf", CompressionRate) << "\n"
<< "Compression ratio: "
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
<< "Compression speed: "
<< llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
<< "Truncated MD5 hash: "
<< llvm::format_hex(TruncatedHash, 16) << "\n";
}

return llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
Expand Down Expand Up @@ -1070,7 +1113,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
if (Verbose) {
DecompressTimer.stopTimer();

// Recalculate MD5 hash
double DecompressionTimeSeconds =
DecompressTimer.getTotalTime().getWallTime();

// Recalculate MD5 hash for integrity check
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
"Hash recalculation time",
ClangOffloadBundlerTimerGroup);
Expand All @@ -1084,16 +1130,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
HashRecalcTimer.stopTimer();
bool HashMatch = (StoredHash == RecalculatedHash);

double CompressionRate =
static_cast<double>(UncompressedSize) / CompressedData.size();
double DecompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
<< "Decompression method: "
<< (CompressionFormat == llvm::compression::Format::Zlib
? "zlib"
: "zstd")
<< "\n"
<< "Size before decompression: " << CompressedData.size()
<< " bytes\n"
<< "Size after decompression: " << UncompressedSize
<< " bytes\n"
<< "Size before decompression: "
<< formatWithCommas(CompressedData.size()) << " bytes\n"
<< "Size after decompression: "
<< formatWithCommas(UncompressedSize) << " bytes\n"
<< "Compression rate: "
<< llvm::format("%.2lf", CompressionRate) << "\n"
<< "Compression ratio: "
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
<< "Decompression speed: "
<< llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
<< "Recalculated hash: "
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
Expand Down Expand Up @@ -1287,8 +1344,10 @@ Error OffloadBundler::BundleFiles() {
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(Buffer.data(), Buffer.size()));
auto CompressionResult =
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
auto CompressionResult = CompressedOffloadBundle::compress(
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
/*zstdEnableLdm=*/true},
*BufferMemory, BundlerConfig.Verbose);
if (auto Error = CompressionResult.takeError())
return Error;

Expand Down
11 changes: 2 additions & 9 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8529,7 +8529,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
}

// Begin OffloadBundler

void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
Expand Down Expand Up @@ -8627,11 +8626,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
}
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
addOffloadCompressArgs(TCArgs, CmdArgs);
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
Expand Down Expand Up @@ -8900,9 +8895,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
for (const char *LinkArg : LinkCommand->getArguments())
CmdArgs.push_back(LinkArg);

if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("--compress");
addOffloadCompressArgs(Args, CmdArgs);

const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2863,3 +2863,15 @@ void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
CmdArgs.push_back("+outline-atomics");
}
}

void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
llvm::opt::ArgStringList &CmdArgs) {
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ))
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
}
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const llvm::Triple &Triple);
void addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
llvm::opt::ArgStringList &CmdArgs);

} // end namespace tools
} // end namespace driver
Expand Down
7 changes: 2 additions & 5 deletions clang/lib/Driver/ToolChains/HIPUtility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "HIPUtility.h"
#include "Clang.h"
#include "CommonArgs.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Options.h"
Expand Down Expand Up @@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
Args.MakeArgString(std::string("-output=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);

if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
BundlerArgs.push_back("-compress");
if (Args.hasArg(options::OPT_v))
BundlerArgs.push_back("-verbose");
addOffloadCompressArgs(Args, BundlerArgs);

const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
Expand Down
21 changes: 18 additions & 3 deletions clang/test/Driver/clang-offload-bundler-zlib.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: zlib
// REQUIRES: zlib && !zstd
// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}

Expand Down Expand Up @@ -34,13 +34,28 @@
// RUN: diff %t.tgt2 %t.res.tgt2

//
// COMPRESS: Compression method used:
// DECOMPRESS: Decompression method:
// COMPRESS: Compression method used: zlib
// COMPRESS: Compression level: 6
// DECOMPRESS: Decompression method: zlib
// DECOMPRESS: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

// Check -compression-level= option

// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
// RUN: FileCheck -check-prefix=LEVEL %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// LEVEL: Compression method used: zlib
// LEVEL: Compression level: 9

//
// Check -bundle-align option.
//
Expand Down
19 changes: 17 additions & 2 deletions clang/test/Driver/clang-offload-bundler-zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,28 @@
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used
// DECOMPRESS: Decompression method
// COMPRESS: Compression method used: zstd
// COMPRESS: Compression level: 20
// DECOMPRESS: Decompression method: zstd
// DECOMPRESS: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

// Check -compression-level= option

// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
// RUN: FileCheck -check-prefix=LEVEL %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// LEVEL: Compression method used: zstd
// LEVEL: Compression level: 9

//
// Check -bundle-align option.
//
Expand Down
7 changes: 4 additions & 3 deletions clang/test/Driver/hip-offload-compress-zlib.hip
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: zlib
// REQUIRES: zlib && !zstd
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target

Expand All @@ -9,13 +9,14 @@
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: --offload-compress --offload-compression-level=9 \
// RUN: --offload-device-only --gpu-bundle-output \
// RUN: -o %t.bc \
// RUN: 2>&1 | FileCheck %s

// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK-SAME: -compress -verbose -compression-level=9
// CHECK: Compressed bundle format

// Test uncompress of bundled bitcode.
Expand Down

0 comments on commit 124d0b7

Please sign in to comment.