diff --git a/mlir/include/mlir/Target/LLVM/ModuleToObject.h b/mlir/include/mlir/Target/LLVM/ModuleToObject.h
index d17afc1077fb4..e40d7e9a43dd6 100644
--- a/mlir/include/mlir/Target/LLVM/ModuleToObject.h
+++ b/mlir/include/mlir/Target/LLVM/ModuleToObject.h
@@ -31,7 +31,7 @@ class ModuleToObject {
 public:
   ModuleToObject(Operation &module, StringRef triple, StringRef chip,
                  StringRef features = {}, int optLevel = 3);
-  virtual ~ModuleToObject() = default;
+  virtual ~ModuleToObject();
 
   /// Returns the operation being serialized.
   Operation &getOperation();
@@ -42,44 +42,43 @@ class ModuleToObject {
 protected:
   // Hooks to be implemented by derived classes.
 
+  /// Hook for computing the Datalayout
+  virtual void setDataLayoutAndTriple(llvm::Module &module);
+
   /// Hook for loading bitcode files, returns std::nullopt on failure.
   virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
-  loadBitcodeFiles(llvm::Module &module, llvm::TargetMachine &targetMachine) {
+  loadBitcodeFiles(llvm::Module &module) {
     return SmallVector<std::unique_ptr<llvm::Module>>();
   }
 
   /// Hook for performing additional actions on a loaded bitcode file.
-  virtual LogicalResult handleBitcodeFile(llvm::Module &module,
-                                          llvm::TargetMachine &targetMachine) {
+  virtual LogicalResult handleBitcodeFile(llvm::Module &module) {
     return success();
   }
 
   /// Hook for performing additional actions on the llvmModule pre linking.
-  virtual void handleModulePreLink(llvm::Module &module,
-                                   llvm::TargetMachine &targetMachine) {}
+  virtual void handleModulePreLink(llvm::Module &module) {}
 
   /// Hook for performing additional actions on the llvmModule post linking.
-  virtual void handleModulePostLink(llvm::Module &module,
-                                    llvm::TargetMachine &targetMachine) {}
+  virtual void handleModulePostLink(llvm::Module &module) {}
 
   /// Serializes the LLVM IR bitcode to an object file, by default it serializes
   /// to LLVM bitcode.
   virtual std::optional<SmallVector<char, 0>>
-  moduleToObject(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine);
+  moduleToObject(llvm::Module &llvmModule);
 
 protected:
   /// Create the target machine based on the target triple and chip.
-  std::unique_ptr<llvm::TargetMachine> createTargetMachine();
+  /// This can fail if the target is not available.
+  std::optional<llvm::TargetMachine *> getOrCreateTargetMachine();
 
   /// Loads a bitcode file from path.
-  std::unique_ptr<llvm::Module>
-  loadBitcodeFile(llvm::LLVMContext &context,
-                  llvm::TargetMachine &targetMachine, StringRef path);
+  std::unique_ptr<llvm::Module> loadBitcodeFile(llvm::LLVMContext &context,
+                                                StringRef path);
 
   /// Loads multiple bitcode files.
   LogicalResult loadBitcodeFilesFromList(
-      llvm::LLVMContext &context, llvm::TargetMachine &targetMachine,
-      ArrayRef<std::string> fileList,
+      llvm::LLVMContext &context, ArrayRef<std::string> fileList,
       SmallVector<std::unique_ptr<llvm::Module>> &llvmModules,
       bool failureOnError = true);
 
@@ -92,8 +91,7 @@ class ModuleToObject {
                           SmallVector<std::unique_ptr<llvm::Module>> &&libs);
 
   /// Optimize the module.
-  LogicalResult optimizeModule(llvm::Module &module,
-                               llvm::TargetMachine &targetMachine, int optL);
+  virtual LogicalResult optimizeModule(llvm::Module &module, int optL);
 
   /// Utility function for translating to ISA, returns `std::nullopt` on
   /// failure.
@@ -115,6 +113,11 @@ class ModuleToObject {
 
   /// Optimization level.
   int optLevel;
+
+private:
+  /// The TargetMachine created for the given Triple, if available.
+  /// Accessible through `getOrCreateTargetMachine()`.
+  std::unique_ptr<llvm::TargetMachine> targetMachine;
 };
 } // namespace LLVM
 } // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
index d5926d1548472..65ae8a6bdb4ad 100644
--- a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
@@ -55,8 +55,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
 
   /// Loads the bitcode files in `fileList`.
   virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
-  loadBitcodeFiles(llvm::Module &module,
-                   llvm::TargetMachine &targetMachine) override;
+  loadBitcodeFiles(llvm::Module &module) override;
 
 protected:
   /// NVVM target attribute.
diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index c14fa80056a87..374fa65bd02e3 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -54,16 +54,13 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
 
   /// Loads the bitcode files in `fileList`.
   virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
-  loadBitcodeFiles(llvm::Module &module,
-                   llvm::TargetMachine &targetMachine) override;
+  loadBitcodeFiles(llvm::Module &module) override;
 
   /// Adds `oclc` control variables to the LLVM module.
-  void handleModulePreLink(llvm::Module &module,
-                           llvm::TargetMachine &targetMachine) override;
+  void handleModulePreLink(llvm::Module &module) override;
 
   /// Removes unnecessary metadata from the loaded bitcode files.
-  LogicalResult handleBitcodeFile(llvm::Module &module,
-                                  llvm::TargetMachine &targetMachine) override;
+  LogicalResult handleBitcodeFile(llvm::Module &module) override;
 
 protected:
   /// Appends the paths of common ROCm device libraries to `libs`.
diff --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
index 255b9efd32f86..b15876ab91c13 100644
--- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
@@ -1,4 +1,4 @@
-if (MLIR_ENABLE_CUDA_CONVERSIONS)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(NVPTX_LIBS
     NVPTXCodeGen
     NVPTXDesc
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 324d5c1366722..1601413c49f1f 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,4 +1,4 @@
-if (MLIR_ENABLE_CUDA_CONVERSIONS)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(NVPTX_LIBS
     NVPTXCodeGen
     NVPTXDesc
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index ce07c259df833..cc2c3a00a02ea 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -21,7 +21,7 @@ add_mlir_library(MLIRTargetLLVM
   MLIRTargetLLVMIRExport
 )
 
-if (MLIR_ENABLE_CUDA_CONVERSIONS)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(NVPTX_LIBS
     NVPTXCodeGen
     NVPTXDesc
diff --git a/mlir/lib/Target/LLVM/ModuleToObject.cpp b/mlir/lib/Target/LLVM/ModuleToObject.cpp
index e68ae8311ecfb..d94c10de8d7c4 100644
--- a/mlir/lib/Target/LLVM/ModuleToObject.cpp
+++ b/mlir/lib/Target/LLVM/ModuleToObject.cpp
@@ -39,32 +39,34 @@ ModuleToObject::ModuleToObject(Operation &module, StringRef triple,
     : module(module), triple(triple), chip(chip), features(features),
       optLevel(optLevel) {}
 
+ModuleToObject::~ModuleToObject() = default;
+
 Operation &ModuleToObject::getOperation() { return module; }
 
-std::unique_ptr<llvm::TargetMachine> ModuleToObject::createTargetMachine() {
-  std::string error;
+std::optional<llvm::TargetMachine *>
+ModuleToObject::getOrCreateTargetMachine() {
+  if (targetMachine)
+    return targetMachine.get();
   // Load the target.
+  std::string error;
   const llvm::Target *target =
       llvm::TargetRegistry::lookupTarget(triple, error);
   if (!target) {
-    getOperation().emitError() << "Failed to lookup target: " << error;
-    return {};
+    getOperation().emitError()
+        << "Failed to lookup target for triple '" << triple << "' " << error;
+    return std::nullopt;
   }
 
   // Create the target machine using the target.
-  llvm::TargetMachine *machine =
-      target->createTargetMachine(triple, chip, features, {}, {});
-  if (!machine) {
-    getOperation().emitError() << "Failed to create the target machine.";
-    return {};
-  }
-  return std::unique_ptr<llvm::TargetMachine>{machine};
+  targetMachine.reset(
+      target->createTargetMachine(triple, chip, features, {}, {}));
+  if (!targetMachine)
+    return std::nullopt;
+  return targetMachine.get();
 }
 
 std::unique_ptr<llvm::Module>
-ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context,
-                                llvm::TargetMachine &targetMachine,
-                                StringRef path) {
+ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, StringRef path) {
   llvm::SMDiagnostic error;
   std::unique_ptr<llvm::Module> library =
       llvm::getLazyIRFileModule(path, error, context);
@@ -73,15 +75,14 @@ ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context,
                                << ", error: " << error.getMessage();
     return nullptr;
   }
-  if (failed(handleBitcodeFile(*library, targetMachine))) {
+  if (failed(handleBitcodeFile(*library))) {
     return nullptr;
   }
   return library;
 }
 
 LogicalResult ModuleToObject::loadBitcodeFilesFromList(
-    llvm::LLVMContext &context, llvm::TargetMachine &targetMachine,
-    ArrayRef<std::string> fileList,
+    llvm::LLVMContext &context, ArrayRef<std::string> fileList,
     SmallVector<std::unique_ptr<llvm::Module>> &llvmModules,
     bool failureOnError) {
   for (const std::string &str : fileList) {
@@ -93,7 +94,7 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList(
       return failure();
     }
     // Load the file or abort on error.
-    if (auto bcFile = loadBitcodeFile(context, targetMachine, pathRef))
+    if (auto bcFile = loadBitcodeFile(context, pathRef))
       llvmModules.push_back(std::move(bcFile));
     else if (failureOnError)
       return failure();
@@ -137,16 +138,22 @@ ModuleToObject::linkFiles(llvm::Module &module,
 }
 
 LogicalResult ModuleToObject::optimizeModule(llvm::Module &module,
-                                             llvm::TargetMachine &targetMachine,
+
                                              int optLevel) {
   if (optLevel < 0 || optLevel > 3)
     return getOperation().emitError()
            << "Invalid optimization level: " << optLevel << ".";
 
-  targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));
+  std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  if (!targetMachine)
+    return getOperation().emitError()
+           << "Target Machine unavailable for triple " << triple
+           << ", can't optimize with LLVM\n";
+  (*targetMachine)->setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));
 
   auto transformer =
-      makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
+      makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, *targetMachine);
   auto error = transformer(&module);
   if (error) {
     InFlightDiagnostic mlirError = getOperation().emitError();
@@ -178,9 +185,19 @@ ModuleToObject::translateToISA(llvm::Module &llvmModule,
   return stream.str();
 }
 
+void ModuleToObject::setDataLayoutAndTriple(llvm::Module &module) {
+  // Create the target machine.
+  std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  if (targetMachine) {
+    // Set the data layout and target triple of the module.
+    module.setDataLayout((*targetMachine)->createDataLayout());
+    module.setTargetTriple((*targetMachine)->getTargetTriple().getTriple());
+  }
+}
+
 std::optional<SmallVector<char, 0>>
-ModuleToObject::moduleToObject(llvm::Module &llvmModule,
-                               llvm::TargetMachine &targetMachine) {
+ModuleToObject::moduleToObject(llvm::Module &llvmModule) {
   SmallVector<char, 0> binaryData;
   // Write the LLVM module bitcode to a buffer.
   llvm::raw_svector_ostream outputStream(binaryData);
@@ -196,32 +213,24 @@ std::optional<SmallVector<char, 0>> ModuleToObject::run() {
     getOperation().emitError() << "Failed creating the llvm::Module.";
     return std::nullopt;
   }
-
-  // Create the target machine.
-  std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
-  if (!targetMachine)
-    return std::nullopt;
-
-  // Set the data layout and target triple of the module.
-  llvmModule->setDataLayout(targetMachine->createDataLayout());
-  llvmModule->setTargetTriple(targetMachine->getTargetTriple().getTriple());
+  setDataLayoutAndTriple(*llvmModule);
 
   // Link bitcode files.
-  handleModulePreLink(*llvmModule, *targetMachine);
+  handleModulePreLink(*llvmModule);
   {
-    auto libs = loadBitcodeFiles(*llvmModule, *targetMachine);
+    auto libs = loadBitcodeFiles(*llvmModule);
     if (!libs)
       return std::nullopt;
     if (!libs->empty())
       if (failed(linkFiles(*llvmModule, std::move(*libs))))
         return std::nullopt;
-    handleModulePostLink(*llvmModule, *targetMachine);
+    handleModulePostLink(*llvmModule);
   }
 
   // Optimize the module.
-  if (failed(optimizeModule(*llvmModule, *targetMachine, optLevel)))
+  if (failed(optimizeModule(*llvmModule, optLevel)))
     return std::nullopt;
 
   // Return the serialized object.
-  return moduleToObject(*llvmModule, *targetMachine);
+  return moduleToObject(*llvmModule);
 }
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index 7f263627db54f..eaf94147e2a6f 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -106,7 +106,7 @@ void SerializeGPUModuleBase::init() {
   static llvm::once_flag initializeBackendOnce;
   llvm::call_once(initializeBackendOnce, []() {
   // If the `NVPTX` LLVM target was built, initialize it.
-#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
+#if LLVM_HAS_NVPTX_TARGET
     LLVMInitializeNVPTXTarget();
     LLVMInitializeNVPTXTargetInfo();
     LLVMInitializeNVPTXTargetMC();
@@ -148,11 +148,10 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
 }
 
 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
-SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module,
-                                         llvm::TargetMachine &targetMachine) {
+SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
-  if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine,
-                                      fileList, bcFiles, true)))
+  if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
+                                      true)))
     return std::nullopt;
   return std::move(bcFiles);
 }
@@ -175,8 +174,7 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
   compileToBinaryNVPTX(const std::string &ptxCode);
 
   std::optional<SmallVector<char, 0>>
-  moduleToObject(llvm::Module &llvmModule,
-                 llvm::TargetMachine &targetMachine) override;
+  moduleToObject(llvm::Module &llvmModule) override;
 
 private:
   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
@@ -514,8 +512,7 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
 #endif // MLIR_NVPTXCOMPILER_ENABLED == 1
 
 std::optional<SmallVector<char, 0>>
-NVPTXSerializer::moduleToObject(llvm::Module &llvmModule,
-                                llvm::TargetMachine &targetMachine) {
+NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
   // Return LLVM IR if the compilation target is offload.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
@@ -526,11 +523,18 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule,
   });
 #undef DEBUG_TYPE
   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
-    return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine);
+    return SerializeGPUModuleBase::moduleToObject(llvmModule);
 
   // Emit PTX code.
+  std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  if (!targetMachine) {
+    getOperation().emitError() << "Target Machine unavailable for triple "
+                               << triple << ", can't optimize with LLVM\n";
+    return std::nullopt;
+  }
   std::optional<std::string> serializedISA =
-      translateToISA(llvmModule, targetMachine);
+      translateToISA(llvmModule, **targetMachine);
   if (!serializedISA) {
     getOperation().emitError() << "Failed translating the module to ISA.";
     return std::nullopt;
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index 23e9a4a52b435..709275c7ddef2 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -44,6 +44,7 @@
 #include "llvm/TargetParser/TargetParser.h"
 
 #include <cstdlib>
+#include <optional>
 
 using namespace mlir;
 using namespace mlir::ROCDL;
@@ -158,18 +159,15 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
 }
 
 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
-SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module,
-                                         llvm::TargetMachine &targetMachine) {
+SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
-  if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine,
-                                      fileList, bcFiles, true)))
+  if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
+                                      true)))
     return std::nullopt;
   return std::move(bcFiles);
 }
 
-LogicalResult
-SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module,
-                                          llvm::TargetMachine &targetMachine) {
+LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
   // Some ROCM builds don't strip this like they should
   if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version"))
     module.eraseNamedMetadata(openclVersion);
@@ -179,8 +177,10 @@ SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module,
   return success();
 }
 
-void SerializeGPUModuleBase::handleModulePreLink(
-    llvm::Module &module, llvm::TargetMachine &targetMachine) {
+void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
+  std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  assert(targetMachine && "expect a TargetMachine");
   addControlVariables(module, target.hasWave64(), target.hasDaz(),
                       target.hasFiniteOnly(), target.hasUnsafeMath(),
                       target.hasFastMath(), target.hasCorrectSqrt(),
@@ -332,8 +332,7 @@ class AMDGPUSerializer : public SerializeGPUModuleBase {
   compileToBinary(const std::string &serializedISA);
 
   std::optional<SmallVector<char, 0>>
-  moduleToObject(llvm::Module &llvmModule,
-                 llvm::TargetMachine &targetMachine) override;
+  moduleToObject(llvm::Module &llvmModule) override;
 
 private:
   // Target options.
@@ -411,8 +410,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
 }
 
 std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule,
-                                 llvm::TargetMachine &targetMachine) {
+AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
   // Return LLVM IR if the compilation target is offload.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
@@ -422,11 +420,19 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule,
   });
 #undef DEBUG_TYPE
   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
-    return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine);
+    return SerializeGPUModuleBase::moduleToObject(llvmModule);
+
+  std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  if (!targetMachine) {
+    getOperation().emitError() << "Target Machine unavailable for triple "
+                               << triple << ", can't compile with LLVM\n";
+    return std::nullopt;
+  }
 
   // Translate the Module to ISA.
   std::optional<std::string> serializedISA =
-      translateToISA(llvmModule, targetMachine);
+      translateToISA(llvmModule, **targetMachine);
   if (!serializedISA) {
     getOperation().emitError() << "Failed translating the module to ISA.";
     return std::nullopt;