340 changes: 293 additions & 47 deletions clang/lib/Driver/OffloadBundler.cpp

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8470,6 +8470,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
}
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
Expand Down Expand Up @@ -8553,6 +8558,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
}
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("-allow-missing-bundles");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");

// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/HIPUtility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
Args.MakeArgString(std::string("-output=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);

if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
BundlerArgs.push_back("-compress");
if (Args.hasArg(options::OPT_v))
BundlerArgs.push_back("-verbose");

const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
C.addCommand(std::make_unique<Command>(
Expand Down
75 changes: 75 additions & 0 deletions clang/test/Driver/clang-offload-bundler-zlib.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// REQUIRES: zlib
// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}

//
// Generate the host binary to be bundled.
//
// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc

//
// Generate an empty file to help with the checks of empty files.
//
// RUN: touch %t.empty

//
// Generate device binaries to be bundled.
//
// RUN: echo 'Content of device file 1' > %t.tgt1
// RUN: echo 'Content of device file 2' > %t.tgt2

//
// Check compression/decompression of offload bundle.
//
// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2

//
// COMPRESS: Compression method used:
// DECOMPRESS: Decompression method:
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

//
// Check -bundle-align option.
//

// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
// RUN: diff %t.bc %t.res.bc
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2

//
// Check unbundling archive.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906

// Some code so that we can create a binary out of this file.
int A = 0;
void test_func(void) {
++A;
}
72 changes: 72 additions & 0 deletions clang/test/Driver/clang-offload-bundler-zstd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// REQUIRES: zstd
// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}

//
// Generate the host binary to be bundled.
//
// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc

//
// Generate an empty file to help with the checks of empty files.
//
// RUN: touch %t.empty

//
// Generate device binaries to be bundled.
//
// RUN: echo 'Content of device file 1' > %t.tgt1
// RUN: echo 'Content of device file 2' > %t.tgt2

//
// Check compression/decompression of offload bundle.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used
// DECOMPRESS: Decompression method
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

//
// Check -bundle-align option.
//

// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
// RUN: diff %t.bc %t.res.bc
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2

//
// Check unbundling archive.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906

// Some code so that we can create a binary out of this file.
int A = 0;
void test_func(void) {
++A;
}
47 changes: 47 additions & 0 deletions clang/test/Driver/hip-offload-compress-zlib.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// REQUIRES: zlib
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target

// Test compress bundled bitcode.

// RUN: rm -rf %T/a.bc
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: -o %T/a.bc \
// RUN: 2>&1 | FileCheck %s

// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK: Compressed bundle format

// Test uncompress of bundled bitcode.

// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpulib \
// RUN: %T/a.bc --offload-device-only \
// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s

// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// UNBUNDLE-SAME: -unbundle
// UNBUNDLE-SAME: -verbose
// UNBUNDLE: Compressed bundle format

// Test compress bundled code objects.

// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress \
// RUN: 2>&1 | FileCheck -check-prefix=CO %s

// CO: clang-offload-bundler{{.*}} -type=o
// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
// CO-SAME: -compress -verbose
// CO: Compressed bundle format
47 changes: 47 additions & 0 deletions clang/test/Driver/hip-offload-compress-zstd.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// REQUIRES: zstd
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target

// Test compress bundled bitcode.

// RUN: rm -rf %T/a.bc
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: -o %T/a.bc \
// RUN: 2>&1 | FileCheck %s

// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK: Compressed bundle format

// Test uncompress of bundled bitcode.

// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpulib \
// RUN: %T/a.bc --offload-device-only \
// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s

// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// UNBUNDLE-SAME: -unbundle
// UNBUNDLE-SAME: -verbose
// UNBUNDLE: Compressed bundle format

// Test compress bundled code objects.

// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress \
// RUN: 2>&1 | FileCheck -check-prefix=CO %s

// CO: clang-offload-bundler{{.*}} -type=o
// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
// CO-SAME: -compress -verbose
// CO: Compressed bundle format
1 change: 1 addition & 0 deletions clang/tools/clang-offload-bundler/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(LLVM_LINK_COMPONENTS
BinaryFormat
Object
Support
TargetParser
Expand Down
10 changes: 10 additions & 0 deletions clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ int main(int argc, const char **argv) {
cl::desc("Treat hip and hipv4 offload kinds as "
"compatible with openmp kind, and vice versa.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
cl::opt<bool> Compress("compress",
cl::desc("Compress output file when bundling.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));

// Process commandline options and report errors
sys::PrintStackTraceOnErrorSignal(argv[0]);
Expand Down Expand Up @@ -163,6 +168,11 @@ int main(int argc, const char **argv) {
BundlerConfig.BundleAlignment = BundleAlignment;
BundlerConfig.FilesType = FilesType;
BundlerConfig.ObjcopyPath = "";
// Do not override the default value Compress and Verbose in BundlerConfig.
if (Compress.getNumOccurrences() > 0)
BundlerConfig.Compress = Compress;
if (Verbose.getNumOccurrences() > 0)
BundlerConfig.Verbose = Verbose;

BundlerConfig.TargetNames = TargetNames;
BundlerConfig.InputFileNames = InputFileNames;
Expand Down
28 changes: 15 additions & 13 deletions llvm/include/llvm/BinaryFormat/Magic.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,21 @@ struct file_magic {
macho_universal_binary, ///< Mach-O universal binary
macho_file_set, ///< Mach-O file set binary
minidump, ///< Windows minidump file
coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
coff_object, ///< COFF object file
coff_import_library, ///< COFF import library
pecoff_executable, ///< PECOFF executable file
windows_resource, ///< Windows compiled resource file (.res)
xcoff_object_32, ///< 32-bit XCOFF object file
xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
tapi_file, ///< Text-based Dynamic Library Stub file
cuda_fatbinary, ///< CUDA Fatbinary object file
offload_binary, ///< LLVM offload object file
dxcontainer_object, ///< DirectX container file
coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
coff_object, ///< COFF object file
coff_import_library, ///< COFF import library
pecoff_executable, ///< PECOFF executable file
windows_resource, ///< Windows compiled resource file (.res)
xcoff_object_32, ///< 32-bit XCOFF object file
xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
tapi_file, ///< Text-based Dynamic Library Stub file
cuda_fatbinary, ///< CUDA Fatbinary object file
offload_binary, ///< LLVM offload object file
dxcontainer_object, ///< DirectX container file
offload_bundle, ///< Clang offload bundle file
offload_bundle_compressed, ///< Compressed clang offload bundle file
};

bool is_object() const { return V != unknown; }
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/BinaryFormat/Magic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "BC\xC0\xDE"))
return file_magic::bitcode;
break;
case 'C':
if (startswith(Magic, "CCOB"))
return file_magic::offload_bundle_compressed;
break;
case '!':
if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
return file_magic::archive;
Expand Down Expand Up @@ -251,6 +255,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;

case '_': {
const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
return file_magic::offload_bundle;
break;
}

default:
break;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/Binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
case file_magic::offload_binary:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/ObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::cuda_fatbinary:
case file_magic::offload_binary:
case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);
Expand Down