diff --git a/mlir/include/mlir/Target/LLVM/ModuleToObject.h b/mlir/include/mlir/Target/LLVM/ModuleToObject.h index d17afc1077fb4..e40d7e9a43dd6 100644 --- a/mlir/include/mlir/Target/LLVM/ModuleToObject.h +++ b/mlir/include/mlir/Target/LLVM/ModuleToObject.h @@ -31,7 +31,7 @@ class ModuleToObject { public: ModuleToObject(Operation &module, StringRef triple, StringRef chip, StringRef features = {}, int optLevel = 3); - virtual ~ModuleToObject() = default; + virtual ~ModuleToObject(); /// Returns the operation being serialized. Operation &getOperation(); @@ -42,44 +42,43 @@ class ModuleToObject { protected: // Hooks to be implemented by derived classes. + /// Hook for computing the Datalayout + virtual void setDataLayoutAndTriple(llvm::Module &module); + /// Hook for loading bitcode files, returns std::nullopt on failure. virtual std::optional>> - loadBitcodeFiles(llvm::Module &module, llvm::TargetMachine &targetMachine) { + loadBitcodeFiles(llvm::Module &module) { return SmallVector>(); } /// Hook for performing additional actions on a loaded bitcode file. - virtual LogicalResult handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) { + virtual LogicalResult handleBitcodeFile(llvm::Module &module) { return success(); } /// Hook for performing additional actions on the llvmModule pre linking. - virtual void handleModulePreLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) {} + virtual void handleModulePreLink(llvm::Module &module) {} /// Hook for performing additional actions on the llvmModule post linking. - virtual void handleModulePostLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) {} + virtual void handleModulePostLink(llvm::Module &module) {} /// Serializes the LLVM IR bitcode to an object file, by default it serializes /// to LLVM bitcode. virtual std::optional> - moduleToObject(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine); + moduleToObject(llvm::Module &llvmModule); protected: /// Create the target machine based on the target triple and chip. - std::unique_ptr createTargetMachine(); + /// This can fail if the target is not available. + std::optional getOrCreateTargetMachine(); /// Loads a bitcode file from path. - std::unique_ptr - loadBitcodeFile(llvm::LLVMContext &context, - llvm::TargetMachine &targetMachine, StringRef path); + std::unique_ptr loadBitcodeFile(llvm::LLVMContext &context, + StringRef path); /// Loads multiple bitcode files. LogicalResult loadBitcodeFilesFromList( - llvm::LLVMContext &context, llvm::TargetMachine &targetMachine, - ArrayRef fileList, + llvm::LLVMContext &context, ArrayRef fileList, SmallVector> &llvmModules, bool failureOnError = true); @@ -92,8 +91,7 @@ class ModuleToObject { SmallVector> &&libs); /// Optimize the module. - LogicalResult optimizeModule(llvm::Module &module, - llvm::TargetMachine &targetMachine, int optL); + virtual LogicalResult optimizeModule(llvm::Module &module, int optL); /// Utility function for translating to ISA, returns `std::nullopt` on /// failure. @@ -115,6 +113,11 @@ class ModuleToObject { /// Optimization level. int optLevel; + +private: + /// The TargetMachine created for the given Triple, if available. + /// Accessible through `getOrCreateTargetMachine()`. + std::unique_ptr targetMachine; }; } // namespace LLVM } // namespace mlir diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h index d5926d1548472..65ae8a6bdb4ad 100644 --- a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h +++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h @@ -55,8 +55,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Loads the bitcode files in `fileList`. virtual std::optional>> - loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + loadBitcodeFiles(llvm::Module &module) override; protected: /// NVVM target attribute. diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index c14fa80056a87..374fa65bd02e3 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -54,16 +54,13 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Loads the bitcode files in `fileList`. virtual std::optional>> - loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + loadBitcodeFiles(llvm::Module &module) override; /// Adds `oclc` control variables to the LLVM module. - void handleModulePreLink(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + void handleModulePreLink(llvm::Module &module) override; /// Removes unnecessary metadata from the loaded bitcode files. - LogicalResult handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) override; + LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: /// Appends the paths of common ROCm device libraries to `libs`. diff --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt index 255b9efd32f86..b15876ab91c13 100644 --- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt @@ -1,4 +1,4 @@ -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 324d5c1366722..1601413c49f1f 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -1,4 +1,4 @@ -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index ce07c259df833..cc2c3a00a02ea 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -21,7 +21,7 @@ add_mlir_library(MLIRTargetLLVM MLIRTargetLLVMIRExport ) -if (MLIR_ENABLE_CUDA_CONVERSIONS) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) set(NVPTX_LIBS NVPTXCodeGen NVPTXDesc diff --git a/mlir/lib/Target/LLVM/ModuleToObject.cpp b/mlir/lib/Target/LLVM/ModuleToObject.cpp index e68ae8311ecfb..d94c10de8d7c4 100644 --- a/mlir/lib/Target/LLVM/ModuleToObject.cpp +++ b/mlir/lib/Target/LLVM/ModuleToObject.cpp @@ -39,32 +39,34 @@ ModuleToObject::ModuleToObject(Operation &module, StringRef triple, : module(module), triple(triple), chip(chip), features(features), optLevel(optLevel) {} +ModuleToObject::~ModuleToObject() = default; + Operation &ModuleToObject::getOperation() { return module; } -std::unique_ptr ModuleToObject::createTargetMachine() { - std::string error; +std::optional +ModuleToObject::getOrCreateTargetMachine() { + if (targetMachine) + return targetMachine.get(); // Load the target. + std::string error; const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple, error); if (!target) { - getOperation().emitError() << "Failed to lookup target: " << error; - return {}; + getOperation().emitError() + << "Failed to lookup target for triple '" << triple << "' " << error; + return std::nullopt; } // Create the target machine using the target. - llvm::TargetMachine *machine = - target->createTargetMachine(triple, chip, features, {}, {}); - if (!machine) { - getOperation().emitError() << "Failed to create the target machine."; - return {}; - } - return std::unique_ptr{machine}; + targetMachine.reset( + target->createTargetMachine(triple, chip, features, {}, {})); + if (!targetMachine) + return std::nullopt; + return targetMachine.get(); } std::unique_ptr -ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, - llvm::TargetMachine &targetMachine, - StringRef path) { +ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, StringRef path) { llvm::SMDiagnostic error; std::unique_ptr library = llvm::getLazyIRFileModule(path, error, context); @@ -73,15 +75,14 @@ ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, << ", error: " << error.getMessage(); return nullptr; } - if (failed(handleBitcodeFile(*library, targetMachine))) { + if (failed(handleBitcodeFile(*library))) { return nullptr; } return library; } LogicalResult ModuleToObject::loadBitcodeFilesFromList( - llvm::LLVMContext &context, llvm::TargetMachine &targetMachine, - ArrayRef fileList, + llvm::LLVMContext &context, ArrayRef fileList, SmallVector> &llvmModules, bool failureOnError) { for (const std::string &str : fileList) { @@ -93,7 +94,7 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList( return failure(); } // Load the file or abort on error. - if (auto bcFile = loadBitcodeFile(context, targetMachine, pathRef)) + if (auto bcFile = loadBitcodeFile(context, pathRef)) llvmModules.push_back(std::move(bcFile)); else if (failureOnError) return failure(); @@ -137,16 +138,22 @@ ModuleToObject::linkFiles(llvm::Module &module, } LogicalResult ModuleToObject::optimizeModule(llvm::Module &module, - llvm::TargetMachine &targetMachine, + int optLevel) { if (optLevel < 0 || optLevel > 3) return getOperation().emitError() << "Invalid optimization level: " << optLevel << "."; - targetMachine.setOptLevel(static_cast(optLevel)); + std::optional targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) + return getOperation().emitError() + << "Target Machine unavailable for triple " << triple + << ", can't optimize with LLVM\n"; + (*targetMachine)->setOptLevel(static_cast(optLevel)); auto transformer = - makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, *targetMachine); auto error = transformer(&module); if (error) { InFlightDiagnostic mlirError = getOperation().emitError(); @@ -178,9 +185,19 @@ ModuleToObject::translateToISA(llvm::Module &llvmModule, return stream.str(); } +void ModuleToObject::setDataLayoutAndTriple(llvm::Module &module) { + // Create the target machine. + std::optional targetMachine = + getOrCreateTargetMachine(); + if (targetMachine) { + // Set the data layout and target triple of the module. + module.setDataLayout((*targetMachine)->createDataLayout()); + module.setTargetTriple((*targetMachine)->getTargetTriple().getTriple()); + } +} + std::optional> -ModuleToObject::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +ModuleToObject::moduleToObject(llvm::Module &llvmModule) { SmallVector binaryData; // Write the LLVM module bitcode to a buffer. llvm::raw_svector_ostream outputStream(binaryData); @@ -196,32 +213,24 @@ std::optional> ModuleToObject::run() { getOperation().emitError() << "Failed creating the llvm::Module."; return std::nullopt; } - - // Create the target machine. - std::unique_ptr targetMachine = createTargetMachine(); - if (!targetMachine) - return std::nullopt; - - // Set the data layout and target triple of the module. - llvmModule->setDataLayout(targetMachine->createDataLayout()); - llvmModule->setTargetTriple(targetMachine->getTargetTriple().getTriple()); + setDataLayoutAndTriple(*llvmModule); // Link bitcode files. - handleModulePreLink(*llvmModule, *targetMachine); + handleModulePreLink(*llvmModule); { - auto libs = loadBitcodeFiles(*llvmModule, *targetMachine); + auto libs = loadBitcodeFiles(*llvmModule); if (!libs) return std::nullopt; if (!libs->empty()) if (failed(linkFiles(*llvmModule, std::move(*libs)))) return std::nullopt; - handleModulePostLink(*llvmModule, *targetMachine); + handleModulePostLink(*llvmModule); } // Optimize the module. - if (failed(optimizeModule(*llvmModule, *targetMachine, optLevel))) + if (failed(optimizeModule(*llvmModule, optLevel))) return std::nullopt; // Return the serialized object. - return moduleToObject(*llvmModule, *targetMachine); + return moduleToObject(*llvmModule); } diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index 7f263627db54f..eaf94147e2a6f 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -106,7 +106,7 @@ void SerializeGPUModuleBase::init() { static llvm::once_flag initializeBackendOnce; llvm::call_once(initializeBackendOnce, []() { // If the `NVPTX` LLVM target was built, initialize it. -#if MLIR_CUDA_CONVERSIONS_ENABLED == 1 +#if LLVM_HAS_NVPTX_TARGET LLVMInitializeNVPTXTarget(); LLVMInitializeNVPTXTargetInfo(); LLVMInitializeNVPTXTargetMC(); @@ -148,11 +148,10 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() { } std::optional>> -SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; - if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, - fileList, bcFiles, true))) + if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, + true))) return std::nullopt; return std::move(bcFiles); } @@ -175,8 +174,7 @@ class NVPTXSerializer : public SerializeGPUModuleBase { compileToBinaryNVPTX(const std::string &ptxCode); std::optional> - moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) override; + moduleToObject(llvm::Module &llvmModule) override; private: using TmpFile = std::pair, llvm::FileRemover>; @@ -514,8 +512,7 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) { #endif // MLIR_NVPTXCOMPILER_ENABLED == 1 std::optional> -NVPTXSerializer::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ @@ -526,11 +523,18 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule, }); #undef DEBUG_TYPE if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) - return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + return SerializeGPUModuleBase::moduleToObject(llvmModule); // Emit PTX code. + std::optional targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) { + getOperation().emitError() << "Target Machine unavailable for triple " + << triple << ", can't optimize with LLVM\n"; + return std::nullopt; + } std::optional serializedISA = - translateToISA(llvmModule, targetMachine); + translateToISA(llvmModule, **targetMachine); if (!serializedISA) { getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt; diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 23e9a4a52b435..709275c7ddef2 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -44,6 +44,7 @@ #include "llvm/TargetParser/TargetParser.h" #include +#include using namespace mlir; using namespace mlir::ROCDL; @@ -158,18 +159,15 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() { } std::optional>> -SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; - if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine, - fileList, bcFiles, true))) + if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, + true))) return std::nullopt; return std::move(bcFiles); } -LogicalResult -SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, - llvm::TargetMachine &targetMachine) { +LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Some ROCM builds don't strip this like they should if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) module.eraseNamedMetadata(openclVersion); @@ -179,8 +177,10 @@ SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module, return success(); } -void SerializeGPUModuleBase::handleModulePreLink( - llvm::Module &module, llvm::TargetMachine &targetMachine) { +void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { + std::optional targetMachine = + getOrCreateTargetMachine(); + assert(targetMachine && "expect a TargetMachine"); addControlVariables(module, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), @@ -332,8 +332,7 @@ class AMDGPUSerializer : public SerializeGPUModuleBase { compileToBinary(const std::string &serializedISA); std::optional> - moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) override; + moduleToObject(llvm::Module &llvmModule) override; private: // Target options. @@ -411,8 +410,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { } std::optional> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ @@ -422,11 +420,19 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule, }); #undef DEBUG_TYPE if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) - return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine); + return SerializeGPUModuleBase::moduleToObject(llvmModule); + + std::optional targetMachine = + getOrCreateTargetMachine(); + if (!targetMachine) { + getOperation().emitError() << "Target Machine unavailable for triple " + << triple << ", can't compile with LLVM\n"; + return std::nullopt; + } // Translate the Module to ISA. std::optional serializedISA = - translateToISA(llvmModule, targetMachine); + translateToISA(llvmModule, **targetMachine); if (!serializedISA) { getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt;