Skip to content

Commit

Permalink
[mlir] Remove mlir-rocm-runner
Browse files Browse the repository at this point in the history
This change combines for ROCm what was done for CUDA in D97463, D98203, D98360, and D98396.

I did not try to compile SerializeToHsaco.cpp or test mlir/test/Integration/GPU/ROCM because I don't have an AMD card. I fixed the things that had obvious bit-rot though.

Reviewed By: whchung

Differential Revision: https://reviews.llvm.org/D98447
  • Loading branch information
chsigg committed Mar 19, 2021
1 parent 4ee4f9b commit a825fb2
Show file tree
Hide file tree
Showing 21 changed files with 494 additions and 544 deletions.
4 changes: 4 additions & 0 deletions mlir/include/mlir/Dialect/GPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
/// annotation.
void registerGpuSerializeToCubinPass();

/// Register pass to serialize GPU kernel functions to a HSAco binary
/// annotation.
void registerGpuSerializeToHsacoPass();

/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/GPU/Passes.h.inc"
Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/InitAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ inline void registerAllPasses() {
registerAsyncPasses();
registerGPUPasses();
registerGpuSerializeToCubinPass();
registerGpuSerializeToHsacoPass();
registerLinalgPasses();
LLVM::registerLLVMPasses();
quant::registerQuantPasses();
Expand Down
67 changes: 67 additions & 0 deletions mlir/lib/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ if (MLIR_CUDA_CONVERSIONS_ENABLED)
)
endif()

if (MLIR_ROCM_CONVERSIONS_ENABLED)
set(AMDGPU_LIBS
MCParser
AMDGPUAsmParser
AMDGPUCodeGen
AMDGPUDesc
AMDGPUInfo
)
endif()

add_mlir_dialect_library(MLIRGPU
IR/GPUDialect.cpp
Transforms/AllReduceLowering.cpp
Expand All @@ -15,6 +25,7 @@ add_mlir_dialect_library(MLIRGPU
Transforms/ParallelLoopMapper.cpp
Transforms/SerializeToBlob.cpp
Transforms/SerializeToCubin.cpp
Transforms/SerializeToHsaco.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
Expand All @@ -23,6 +34,7 @@ add_mlir_dialect_library(MLIRGPU
Core
MC
${NVPTX_LIBS}
${AMDGPU_LIBS}

DEPENDS
MLIRGPUOpsIncGen
Expand Down Expand Up @@ -84,3 +96,58 @@ if(MLIR_CUDA_RUNNER_ENABLED)
)

endif()

if(MLIR_ROCM_RUNNER_ENABLED)
if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
message(SEND_ERROR
"Building mlir with ROCm support requires the AMDGPU backend")
endif()

# Ensure lld is enabled.
if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS)
message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS")
endif()

# Configure ROCm support.
if (NOT DEFINED ROCM_PATH)
if (NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
endif()
set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH " Path to which HIP has been installed")
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP)
if (NOT HIP_FOUND)
message(SEND_ERROR "Building mlir with ROCm support requires a working ROCm and HIP install")
else()
message(STATUS "ROCm HIP version: ${HIP_VERSION}")
endif()

target_compile_definitions(obj.MLIRGPU
PRIVATE
__HIP_PLATFORM_HCC__
__ROCM_PATH__="${ROCM_PATH}"
MLIR_GPU_TO_HSACO_PASS_ENABLE=1
)

target_include_directories(obj.MLIRGPU
PRIVATE
${MLIR_SOURCE_DIR}/../lld/include
${HIP_PATH}/include
${ROCM_PATH}/include
)

target_link_libraries(MLIRGPU
PRIVATE
lldELF
MLIRROCDLToLLVMIRTranslation
)

# Link lldELF also to libmlir.so. Create an alias that starts with LLVM
# because LINK_COMPONENTS elements are implicitly prefixed with LLVM.
add_library(LLVMAliasTolldELF ALIAS lldELF)
set_property(GLOBAL APPEND PROPERTY MLIR_LLVM_LINK_COMPONENTS AliasTolldELF)

endif()
284 changes: 284 additions & 0 deletions mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass that serializes a gpu module into HSAco blob and
// adds that blob as a string attribute of the module.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/Passes.h"

#if MLIR_GPU_TO_HSACO_PASS_ENABLE
#include "mlir/Pass/Pass.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"

#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"

#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetOptions.h"

#include "lld/Common/Driver.h"

#include "hip/hip_version.h"

#include <mutex>

using namespace mlir;

namespace {
class SerializeToHsacoPass
: public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
public:
SerializeToHsacoPass();

private:
void getDependentDialects(DialectRegistry &registry) const override;

// Serializes ROCDL to HSACO.
std::unique_ptr<std::vector<char>>
serializeISA(const std::string &isa) override;

std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
std::unique_ptr<std::vector<char>>
createHsaco(const SmallVectorImpl<char> &isaBinary);
};
} // namespace

static std::string getDefaultChip() {
const char kDefaultChip[] = "gfx900";

// Locate rocm_agent_enumerator.
const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
if (!rocmAgentEnumerator) {
llvm::WithColor::warning(llvm::errs())
<< kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
<< "/bin\n";
return kDefaultChip;
}

// Prepare temp file to hold the outputs.
int tempFd = -1;
SmallString<128> tempFilename;
if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
tempFilename)) {
llvm::WithColor::warning(llvm::errs())
<< "temporary file for " << kRocmAgentEnumerator << " creation error\n";
return kDefaultChip;
}
llvm::FileRemover cleanup(tempFilename);

// Invoke rocm_agent_enumerator.
std::string errorMessage;
SmallVector<StringRef, 2> args{"-t", "GPU"};
Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
int result =
llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
redirects, 0, 0, &errorMessage);
if (result) {
llvm::WithColor::warning(llvm::errs())
<< kRocmAgentEnumerator << " invocation error: " << errorMessage
<< "\n";
return kDefaultChip;
}

// Load and parse the result.
auto gfxIsaList = openInputFile(tempFilename);
if (!gfxIsaList) {
llvm::WithColor::error(llvm::errs())
<< "read ROCm agent list temp file error\n";
return kDefaultChip;
}
for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
// Skip the line with content "gfx000".
if (*lines == "gfx000")
continue;
// Use the first ISA version found.
return lines->str();
}

return kDefaultChip;
}

// Sets the 'option' to 'value' unless it already has a value.
static void maybeSetOption(Pass::Option<std::string> &option,
function_ref<std::string()> getValue) {
if (!option.hasValue())
option = getValue();
}

SerializeToHsacoPass::SerializeToHsacoPass() {
maybeSetOption(this->triple, [] { return "amdgcn-amd-amdhsa"; });
maybeSetOption(this->chip, [] {
static auto chip = getDefaultChip();
return chip;
});
}

void SerializeToHsacoPass::getDependentDialects(
DialectRegistry &registry) const {
registerROCDLDialectTranslation(registry);
gpu::SerializeToBlobPass::getDependentDialects(registry);
}

std::unique_ptr<SmallVectorImpl<char>>
SerializeToHsacoPass::assembleIsa(const std::string &isa) {
auto loc = getOperation().getLoc();

SmallVector<char, 0> result;
llvm::raw_svector_ostream os(result);

llvm::Triple triple(llvm::Triple::normalize(this->triple));
std::string error;
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
if (!target) {
emitError(loc, Twine("failed to lookup target: ") + error);
return {};
}

llvm::SourceMgr srcMgr;
srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
llvm::SMLoc());

const llvm::MCTargetOptions mcOptions;
std::unique_ptr<llvm::MCRegisterInfo> mri(
target->createMCRegInfo(this->triple));
std::unique_ptr<llvm::MCAsmInfo> mai(
target->createMCAsmInfo(*mri, this->triple, mcOptions));
mai->setRelaxELFRelocations(true);

llvm::MCObjectFileInfo mofi;
llvm::MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions);
mofi.InitMCObjectFileInfo(triple, false, ctx, false);

SmallString<128> cwd;
if (!llvm::sys::fs::current_path(cwd))
ctx.setCompilationDir(cwd);

std::unique_ptr<llvm::MCStreamer> mcStreamer;
std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
std::unique_ptr<llvm::MCSubtargetInfo> sti(
target->createMCSubtargetInfo(this->triple, this->chip, this->features));

llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
mcStreamer.reset(target->createMCObjectStreamer(
triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
*sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
mcStreamer->setUseAssemblerInfoForParsing(true);

std::unique_ptr<llvm::MCAsmParser> parser(
createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
std::unique_ptr<llvm::MCTargetAsmParser> tap(
target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));

if (!tap) {
emitError(loc, "assembler initialization error");
return {};
}

parser->setTargetParser(*tap);
parser->Run(false);

return std::make_unique<SmallVector<char, 0>>(std::move(result));
}

std::unique_ptr<std::vector<char>>
SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
auto loc = getOperation().getLoc();

// Save the ISA binary to a temp file.
int tempIsaBinaryFd = -1;
SmallString<128> tempIsaBinaryFilename;
if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
tempIsaBinaryFilename)) {
emitError(loc, "temporary file for ISA binary creation error");
return {};
}
llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
tempIsaBinaryOs.close();

// Create a temp file for HSA code object.
int tempHsacoFD = -1;
SmallString<128> tempHsacoFilename;
if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
tempHsacoFilename)) {
emitError(loc, "temporary file for HSA code object creation error");
return {};
}
llvm::FileRemover cleanupHsaco(tempHsacoFilename);

{
static std::mutex mutex;
const std::lock_guard<std::mutex> lock(mutex);
// Invoke lld. Expect a true return value from lld.
if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
"-o", tempHsacoFilename.c_str()},
/*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
emitError(loc, "lld invocation error");
return {};
}
}

// Load the HSA code object.
auto hsacoFile = openInputFile(tempHsacoFilename);
if (!hsacoFile) {
emitError(loc, "read HSA code object from temp file error");
return {};
}

StringRef buffer = hsacoFile->getBuffer();
return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
}

std::unique_ptr<std::vector<char>>
SerializeToHsacoPass::serializeISA(const std::string &isa) {
auto isaBinary = assembleIsa(isa);
if (!isaBinary)
return {};
return createHsaco(*isaBinary);
}

// Register pass to serialize GPU kernel functions to a HSACO binary annotation.
void mlir::registerGpuSerializeToHsacoPass() {
PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
"gpu-to-hsaco", "Lower GPU kernel function to HSACO binary annotations",
[] {
// Initialize LLVM AMDGPU backend.
LLVMInitializeAMDGPUAsmParser();
LLVMInitializeAMDGPUAsmPrinter();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTargetMC();

return std::make_unique<SerializeToHsacoPass>();
});
}
#else // MLIR_GPU_TO_HSACO_PASS_ENABLE
void mlir::registerGpuSerializeToHsacoPass() {}
#endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
Loading

0 comments on commit a825fb2

Please sign in to comment.