diff --git a/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp index 7859fb3feaabc..01965da038820 100644 --- a/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp +++ b/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp @@ -12,6 +12,7 @@ #include "level_zero/ze_api.h" #include +#include #include #include #include @@ -379,16 +380,15 @@ struct StreamWrapper { } }; -static ze_module_handle_t loadModule(const void *data, size_t dataSize) { +static ze_module_handle_t +loadModule(const void *data, size_t dataSize, + ze_module_format_t format = ZE_MODULE_FORMAT_NATIVE) { assert(data); ze_module_handle_t zeModule; - ze_module_desc_t desc = {ZE_STRUCTURE_TYPE_MODULE_DESC, - nullptr, - ZE_MODULE_FORMAT_IL_SPIRV, - dataSize, - (const uint8_t *)data, - nullptr, - nullptr}; + ze_module_desc_t desc = { + ZE_STRUCTURE_TYPE_MODULE_DESC, nullptr, format, dataSize, + (const uint8_t *)data, nullptr, nullptr}; + ze_module_build_log_handle_t buildLogHandle; ze_result_t result = zeModuleCreate(getRtContext().context.get(), getRtContext().device, &desc, @@ -520,6 +520,13 @@ extern "C" ze_module_handle_t mgpuModuleLoad(const void *data, return catchAll([&]() { return loadModule(data, gpuBlobSize); }); } +extern "C" ze_module_handle_t mgpuModuleLoadJIT(void *data, int optLevel) { + return catchAll([&]() { + return loadModule(data, strlen(reinterpret_cast(data)), + ZE_MODULE_FORMAT_IL_SPIRV); + }); +} + extern "C" ze_kernel_handle_t mgpuModuleGetFunction(ze_module_handle_t module, const char *name) { assert(module && name); diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp index e7c4f1764bb37..83eec5e9d5549 100644 --- a/mlir/lib/Target/LLVM/XeVM/Target.cpp +++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp @@ -107,9 +107,8 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() { // There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime). // - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers). // - `ocloc` tool can be "queried" from within MLIR. -FailureOr> -SerializeGPUModuleBase::compileToBinary(StringRef asmStr, - StringRef inputFormat) { +FailureOr> SerializeGPUModuleBase::compileToBinary( + StringRef asmStr, StringRef inputFormat = "-spirv_input") { using TmpFile = std::pair, llvm::FileRemover>; // Find the `ocloc` tool. std::optional oclocCompiler = findTool("ocloc"); @@ -341,7 +340,20 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) { return SmallVector(bin.begin(), bin.end()); } - // Level zero runtime is set up to accept SPIR-V binary + // Binary generation path for SPIR-V target. Optimization and SPIR-V + // extensions are enabled in this path. In this path, first the SPIR-V binary + // is generated directly using the SPIR-V backends `SPIRVTranslateModule` API. + // Resultant SPIR-V is then fed to `ocloc` compiler (Intel's OpenCL Offline + // Compiler) to generate the final binary for Intel GPUs. + + // @TODO: This part is doing exact same SPIR-V code generation as the previous + // section under (targetOptions.getCompilationTarget() == + // gpu::CompilationTarget::Assembly) condition. Only execption is, it enables + // optimization and SPIRV extensions support for SPIRV binary output. We need + // to decide which one do we use for our SPIRV code generation, and remove the + // other one to avoid confusion. For now, we keep both to have more + // flexibility for testing and comparison. + std::string serializedSPIRVBinary; std::string ErrMsg; std::vector Opts; @@ -361,8 +373,9 @@ SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) { return getGPUModuleOp().emitError() << "SPIRV code size must be a multiple of 4."; - StringRef bin(serializedSPIRVBinary.c_str(), serializedSPIRVBinary.size()); - return SmallVector(bin.begin(), bin.end()); + StringRef spirvBin(serializedSPIRVBinary.c_str(), + serializedSPIRVBinary.size()); + return compileToBinary(spirvBin, "-spirv_input"); #endif // LLVM_HAS_SPIRV_TARGET }