diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 90c9166282703..00888b7f7a738 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -435,6 +435,14 @@ enum class CXXSpecialMemberKind {
   Invalid
 };
 
+enum class CUDAFunctionTarget {
+  Device,
+  Global,
+  Host,
+  HostDevice,
+  InvalidTarget
+};
+
 /// Sema - This implements semantic analysis and AST building for C.
 /// \nosubgrouping
 class Sema final : public SemaBase {
@@ -3663,20 +3671,12 @@ class Sema final : public SemaBase {
   InternalLinkageAttr *mergeInternalLinkageAttr(Decl *D,
                                                 const InternalLinkageAttr &AL);
 
-  enum CUDAFunctionTarget {
-    CFT_Device,
-    CFT_Global,
-    CFT_Host,
-    CFT_HostDevice,
-    CFT_InvalidTarget
-  };
-
   /// Check validaty of calling convention attribute \p attr. If \p FD
   /// is not null pointer, use \p FD to determine the CUDA/HIP host/device
   /// target. Otherwise, it is specified by \p CFT.
-  bool CheckCallingConvAttr(const ParsedAttr &attr, CallingConv &CC,
-                            const FunctionDecl *FD = nullptr,
-                            CUDAFunctionTarget CFT = CFT_InvalidTarget);
+  bool CheckCallingConvAttr(
+      const ParsedAttr &attr, CallingConv &CC, const FunctionDecl *FD = nullptr,
+      CUDAFunctionTarget CFT = CUDAFunctionTarget::InvalidTarget);
 
   void AddParameterABIAttr(Decl *D, const AttributeCommonInfo &CI,
                            ParameterABI ABI);
@@ -12967,7 +12967,8 @@ class Sema final : public SemaBase {
   /// Example usage:
   ///
   ///  // Variable-length arrays are not allowed in CUDA device code.
-  ///  if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget())
+  ///  if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla)
+  ///     << llvm::to_underlying(CurrentCUDATarget()))
   ///    return ExprError();
   ///  // Otherwise, continue parsing as normal.
   SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc,
@@ -12983,7 +12984,7 @@ class Sema final : public SemaBase {
   /// function.
   ///
   /// Use this rather than examining the function's attributes yourself -- you
-  /// will get it wrong.  Returns CFT_Host if D is null.
+  /// will get it wrong.  Returns CUDAFunctionTarget::Host if D is null.
   CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
                                         bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
@@ -13008,7 +13009,7 @@ class Sema final : public SemaBase {
   /// Define the current global CUDA host/device context where a function may be
   /// called. Only used when a function is called outside of any functions.
   struct CUDATargetContext {
-    CUDAFunctionTarget Target = CFT_HostDevice;
+    CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
     CUDATargetContextKind Kind = CTCK_Unknown;
     Decl *D = nullptr;
   } CurCUDATargetCtx;
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 1596222e3d1da..9d6d709e262ad 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -22,6 +22,7 @@
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallVector.h"
 #include <optional>
 using namespace clang;
@@ -64,8 +65,7 @@ ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                        /*IsExecConfig=*/true);
 }
 
-Sema::CUDAFunctionTarget
-Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
+CUDAFunctionTarget Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
   bool HasHostAttr = false;
   bool HasDeviceAttr = false;
   bool HasGlobalAttr = false;
@@ -90,18 +90,18 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
   }
 
   if (HasInvalidTargetAttr)
-    return CFT_InvalidTarget;
+    return CUDAFunctionTarget::InvalidTarget;
 
   if (HasGlobalAttr)
-    return CFT_Global;
+    return CUDAFunctionTarget::Global;
 
   if (HasHostAttr && HasDeviceAttr)
-    return CFT_HostDevice;
+    return CUDAFunctionTarget::HostDevice;
 
   if (HasDeviceAttr)
-    return CFT_Device;
+    return CUDAFunctionTarget::Device;
 
-  return CFT_Host;
+  return CUDAFunctionTarget::Host;
 }
 
 template <typename A>
@@ -120,43 +120,43 @@ Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
   assert(K == CTCK_InitGlobalVar);
   auto *VD = dyn_cast_or_null<VarDecl>(D);
   if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
-    auto Target = CFT_Host;
+    auto Target = CUDAFunctionTarget::Host;
     if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
          !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
         hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
         hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
-      Target = CFT_Device;
+      Target = CUDAFunctionTarget::Device;
     S.CurCUDATargetCtx = {Target, K, VD};
   }
 }
 
 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
-                                                  bool IgnoreImplicitHDAttr) {
+CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+                                            bool IgnoreImplicitHDAttr) {
   // Code that lives outside a function gets the target from CurCUDATargetCtx.
   if (D == nullptr)
     return CurCUDATargetCtx.Target;
 
   if (D->hasAttr<CUDAInvalidTargetAttr>())
-    return CFT_InvalidTarget;
+    return CUDAFunctionTarget::InvalidTarget;
 
   if (D->hasAttr<CUDAGlobalAttr>())
-    return CFT_Global;
+    return CUDAFunctionTarget::Global;
 
   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
-      return CFT_HostDevice;
-    return CFT_Device;
+      return CUDAFunctionTarget::HostDevice;
+    return CUDAFunctionTarget::Device;
   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
-    return CFT_Host;
+    return CUDAFunctionTarget::Host;
   } else if ((D->isImplicit() || !D->isUserProvided()) &&
              !IgnoreImplicitHDAttr) {
     // Some implicit declarations (like intrinsic functions) are not marked.
     // Set the most lenient target on them for maximal flexibility.
-    return CFT_HostDevice;
+    return CUDAFunctionTarget::HostDevice;
   }
 
-  return CFT_Host;
+  return CUDAFunctionTarget::Host;
 }
 
 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
@@ -181,10 +181,10 @@ Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
   //  - on device side in device or global functions
   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
     switch (IdentifyCUDATarget(FD)) {
-    case CFT_HostDevice:
+    case CUDAFunctionTarget::HostDevice:
       return CVT_Both;
-    case CFT_Device:
-    case CFT_Global:
+    case CUDAFunctionTarget::Device:
+    case CUDAFunctionTarget::Global:
       return CVT_Device;
     default:
       return CVT_Host;
@@ -230,7 +230,7 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
   // will be diagnosed by checkAllowedCUDAInitializer.
   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
-      CurCUDATargetCtx.Target == CFT_Device &&
+      CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
     return CFP_HostDevice;
 
@@ -239,40 +239,47 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
 
   // If one of the targets is invalid, the check always fails, no matter what
   // the other target is.
-  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
+  if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||
+      CalleeTarget == CUDAFunctionTarget::InvalidTarget)
     return CFP_Never;
 
   // (a) Can't call global from some contexts until we support CUDA's
   // dynamic parallelism.
-  if (CalleeTarget == CFT_Global &&
-      (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
+  if (CalleeTarget == CUDAFunctionTarget::Global &&
+      (CallerTarget == CUDAFunctionTarget::Global ||
+       CallerTarget == CUDAFunctionTarget::Device))
     return CFP_Never;
 
   // (b) Calling HostDevice is OK for everyone.
-  if (CalleeTarget == CFT_HostDevice)
+  if (CalleeTarget == CUDAFunctionTarget::HostDevice)
     return CFP_HostDevice;
 
   // (c) Best case scenarios
   if (CalleeTarget == CallerTarget ||
-      (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
-      (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
+      (CallerTarget == CUDAFunctionTarget::Host &&
+       CalleeTarget == CUDAFunctionTarget::Global) ||
+      (CallerTarget == CUDAFunctionTarget::Global &&
+       CalleeTarget == CUDAFunctionTarget::Device))
     return CFP_Native;
 
   // HipStdPar mode is special, in that assessing whether a device side call to
   // a host target is deferred to a subsequent pass, and cannot unambiguously be
   // adjudicated in the AST, hence we optimistically allow them to pass here.
   if (getLangOpts().HIPStdPar &&
-      (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
-       CallerTarget == CFT_HostDevice) &&
-      CalleeTarget == CFT_Host)
+      (CallerTarget == CUDAFunctionTarget::Global ||
+       CallerTarget == CUDAFunctionTarget::Device ||
+       CallerTarget == CUDAFunctionTarget::HostDevice) &&
+      CalleeTarget == CUDAFunctionTarget::Host)
     return CFP_HostDevice;
 
   // (d) HostDevice behavior depends on compilation mode.
-  if (CallerTarget == CFT_HostDevice) {
+  if (CallerTarget == CUDAFunctionTarget::HostDevice) {
     // It's OK to call a compilation-mode matching function from an HD one.
-    if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
+    if ((getLangOpts().CUDAIsDevice &&
+         CalleeTarget == CUDAFunctionTarget::Device) ||
         (!getLangOpts().CUDAIsDevice &&
-         (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
+         (CalleeTarget == CUDAFunctionTarget::Host ||
+          CalleeTarget == CUDAFunctionTarget::Global)))
       return CFP_SameSide;
 
     // Calls from HD to non-mode-matching functions (i.e., to host functions
@@ -283,9 +290,12 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
   }
 
   // (e) Calling across device/host boundary is not something you should do.
-  if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
-      (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
-      (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
+  if ((CallerTarget == CUDAFunctionTarget::Host &&
+       CalleeTarget == CUDAFunctionTarget::Device) ||
+      (CallerTarget == CUDAFunctionTarget::Device &&
+       CalleeTarget == CUDAFunctionTarget::Host) ||
+      (CallerTarget == CUDAFunctionTarget::Global &&
+       CalleeTarget == CUDAFunctionTarget::Host))
     return CFP_Never;
 
   llvm_unreachable("All cases should've been handled by now.");
@@ -337,16 +347,16 @@ void Sema::EraseUnwantedCUDAMatches(
 /// \param ResolvedTarget with a target that resolves for both calls.
 /// \return true if there's a conflict, false otherwise.
 static bool
-resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
-                                Sema::CUDAFunctionTarget Target2,
-                                Sema::CUDAFunctionTarget *ResolvedTarget) {
+resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
+                                CUDAFunctionTarget Target2,
+                                CUDAFunctionTarget *ResolvedTarget) {
   // Only free functions and static member functions may be global.
-  assert(Target1 != Sema::CFT_Global);
-  assert(Target2 != Sema::CFT_Global);
+  assert(Target1 != CUDAFunctionTarget::Global);
+  assert(Target2 != CUDAFunctionTarget::Global);
 
-  if (Target1 == Sema::CFT_HostDevice) {
+  if (Target1 == CUDAFunctionTarget::HostDevice) {
     *ResolvedTarget = Target2;
-  } else if (Target2 == Sema::CFT_HostDevice) {
+  } else if (Target2 == CUDAFunctionTarget::HostDevice) {
     *ResolvedTarget = Target1;
   } else if (Target1 != Target2) {
     return true;
@@ -422,7 +432,8 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
         if (Diagnose) {
           Diag(ClassDecl->getLocation(),
                diag::note_implicit_member_target_infer_collision)
-              << (unsigned)CSM << *InferredTarget << BaseMethodTarget;
+              << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
+              << llvm::to_underlying(BaseMethodTarget);
         }
         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
         return true;
@@ -465,7 +476,8 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
         if (Diagnose) {
           Diag(ClassDecl->getLocation(),
                diag::note_implicit_member_target_infer_collision)
-              << (unsigned)CSM << *InferredTarget << FieldMethodTarget;
+              << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
+              << llvm::to_underlying(FieldMethodTarget);
         }
         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
         return true;
@@ -478,9 +490,9 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
   // it's the least restrictive option that can be invoked from any target.
   bool NeedsH = true, NeedsD = true;
   if (InferredTarget) {
-    if (*InferredTarget == CFT_Device)
+    if (*InferredTarget == CUDAFunctionTarget::Device)
       NeedsH = false;
-    else if (*InferredTarget == CFT_Host)
+    else if (*InferredTarget == CUDAFunctionTarget::Host)
       NeedsD = false;
   }
 
@@ -677,9 +689,10 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
     }
     if (InitFn) {
       CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
-      if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
+      if (InitFnTarget != CUDAFunctionTarget::Host &&
+          InitFnTarget != CUDAFunctionTarget::HostDevice) {
         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
-            << InitFnTarget << InitFn;
+            << llvm::to_underlying(InitFnTarget) << InitFn;
         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
         VD->setInvalidDecl();
       }
@@ -699,8 +712,9 @@ void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice(
   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
 
   // Record whether an implicit host device function is used on device side.
-  if (CallerTarget != CFT_Device && CallerTarget != CFT_Global &&
-      (CallerTarget != CFT_HostDevice ||
+  if (CallerTarget != CUDAFunctionTarget::Device &&
+      CallerTarget != CUDAFunctionTarget::Global &&
+      (CallerTarget != CUDAFunctionTarget::HostDevice ||
        (isCUDAImplicitHostDeviceFunction(Caller) &&
         !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
     return;
@@ -806,10 +820,10 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
     if (!CurFunContext)
       return SemaDiagnosticBuilder::K_Nop;
     switch (CurrentCUDATarget()) {
-    case CFT_Global:
-    case CFT_Device:
+    case CUDAFunctionTarget::Global:
+    case CUDAFunctionTarget::Device:
       return SemaDiagnosticBuilder::K_Immediate;
-    case CFT_HostDevice:
+    case CUDAFunctionTarget::HostDevice:
       // An HD function counts as host code if we're compiling for host, and
       // device code if we're compiling for device.  Defer any errors in device
       // mode until the function is known-emitted.
@@ -836,9 +850,9 @@ Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
     if (!CurFunContext)
       return SemaDiagnosticBuilder::K_Nop;
     switch (CurrentCUDATarget()) {
-    case CFT_Host:
+    case CUDAFunctionTarget::Host:
       return SemaDiagnosticBuilder::K_Immediate;
-    case CFT_HostDevice:
+    case CUDAFunctionTarget::HostDevice:
       // An HD function counts as host code if we're compiling for host, and
       // device code if we're compiling for device.  Defer any errors in device
       // mode until the function is known-emitted.
@@ -911,8 +925,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
     return true;
 
   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
-      << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee
-      << IdentifyCUDATarget(Caller);
+      << llvm::to_underlying(IdentifyCUDATarget(Callee)) << /*function*/ 0
+      << Callee << llvm::to_underlying(IdentifyCUDATarget(Caller));
   if (!Callee->getBuiltinID())
     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
                           diag::note_previous_decl, Caller, *this)
@@ -995,19 +1009,21 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
     // HD/global functions "exist" in some sense on both the host and device, so
     // should have the same implementation on both sides.
     if (NewTarget != OldTarget &&
-        ((NewTarget == CFT_HostDevice &&
+        ((NewTarget == CUDAFunctionTarget::HostDevice &&
           !(LangOpts.OffloadImplicitHostDeviceTemplates &&
             isCUDAImplicitHostDeviceFunction(NewFD) &&
-            OldTarget == CFT_Device)) ||
-         (OldTarget == CFT_HostDevice &&
+            OldTarget == CUDAFunctionTarget::Device)) ||
+         (OldTarget == CUDAFunctionTarget::HostDevice &&
           !(LangOpts.OffloadImplicitHostDeviceTemplates &&
             isCUDAImplicitHostDeviceFunction(OldFD) &&
-            NewTarget == CFT_Device)) ||
-         (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
+            NewTarget == CUDAFunctionTarget::Device)) ||
+         (NewTarget == CUDAFunctionTarget::Global) ||
+         (OldTarget == CUDAFunctionTarget::Global)) &&
         !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
                     /* ConsiderCudaAttrs = */ false)) {
       Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
-          << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
+          << llvm::to_underlying(NewTarget) << NewFD->getDeclName()
+          << llvm::to_underlying(OldTarget) << OldFD;
       Diag(OldFD->getLocation(), diag::note_previous_declaration);
       NewFD->setInvalidDecl();
       break;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index b448bacd49f32..720e56692359b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -20666,11 +20666,11 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
     // when compiling for host, device and global functions are never emitted.
     // (Technically, we do emit a host-side stub for global functions, but this
     // doesn't count for our purposes here.)
-    Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD);
-    if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host)
+    CUDAFunctionTarget T = IdentifyCUDATarget(FD);
+    if (LangOpts.CUDAIsDevice && T == CUDAFunctionTarget::Host)
       return FunctionEmissionStatus::CUDADiscarded;
     if (!LangOpts.CUDAIsDevice &&
-        (T == Sema::CFT_Device || T == Sema::CFT_Global))
+        (T == CUDAFunctionTarget::Device || T == CUDAFunctionTarget::Global))
       return FunctionEmissionStatus::CUDADiscarded;
 
     if (IsEmittedForExternalSymbol())
@@ -20691,5 +20691,5 @@ bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) {
   // for host, only HD functions actually called from the host get marked as
   // known-emitted.
   return LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
-         IdentifyCUDATarget(Callee) == CFT_Global;
+         IdentifyCUDATarget(Callee) == CUDAFunctionTarget::Global;
 }
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index b91064e28e415..56c9d90c9b52b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -42,6 +42,7 @@
 #include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Assumptions.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -5099,7 +5100,7 @@ static void handleSharedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   }
   if (S.getLangOpts().CUDA && VD->hasLocalStorage() &&
       S.CUDADiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared)
-          << S.CurrentCUDATarget())
+          << llvm::to_underlying(S.CurrentCUDATarget()))
     return;
   D->addAttr(::new (S.Context) CUDASharedAttr(S.Context, AL));
 }
@@ -5493,22 +5494,22 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
   // on their host/device attributes.
   if (LangOpts.CUDA) {
     auto *Aux = Context.getAuxTargetInfo();
-    assert(FD || CFT != CFT_InvalidTarget);
+    assert(FD || CFT != CUDAFunctionTarget::InvalidTarget);
     auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT;
     bool CheckHost = false, CheckDevice = false;
     switch (CudaTarget) {
-    case CFT_HostDevice:
+    case CUDAFunctionTarget::HostDevice:
       CheckHost = true;
       CheckDevice = true;
       break;
-    case CFT_Host:
+    case CUDAFunctionTarget::Host:
       CheckHost = true;
       break;
-    case CFT_Device:
-    case CFT_Global:
+    case CUDAFunctionTarget::Device:
+    case CUDAFunctionTarget::Global:
       CheckDevice = true;
       break;
-    case CFT_InvalidTarget:
+    case CUDAFunctionTarget::InvalidTarget:
       llvm_unreachable("unexpected cuda target");
     }
     auto *HostTI = LangOpts.CUDAIsDevice ? Aux : &TI;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ec5ca2b9352ed..18284e0c3e987 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -53,6 +53,7 @@
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ConvertUTF.h"
@@ -17307,7 +17308,8 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
   if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
     if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) {
       CUDAFunctionTarget T = IdentifyCUDATarget(F);
-      if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice)
+      if (T == CUDAFunctionTarget::Global || T == CUDAFunctionTarget::Device ||
+          T == CUDAFunctionTarget::HostDevice)
         return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device));
     }
   }
@@ -19182,14 +19184,16 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
     auto VarTarget = SemaRef.IdentifyCUDATarget(Var);
     auto UserTarget = SemaRef.IdentifyCUDATarget(FD);
     if (VarTarget == Sema::CVT_Host &&
-        (UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice ||
-         UserTarget == Sema::CFT_Global)) {
+        (UserTarget == CUDAFunctionTarget::Device ||
+         UserTarget == CUDAFunctionTarget::HostDevice ||
+         UserTarget == CUDAFunctionTarget::Global)) {
       // Diagnose ODR-use of host global variables in device functions.
       // Reference of device global variables in host functions is allowed
       // through shadow variables therefore it is not diagnosed.
       if (SemaRef.LangOpts.CUDAIsDevice && !SemaRef.LangOpts.HIPStdPar) {
         SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
-            << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget;
+            << /*host*/ 2 << /*variable*/ 1 << Var
+            << llvm::to_underlying(UserTarget);
         SemaRef.targetDiag(Var->getLocation(),
                            Var->getType().isConstQualified()
                                ? diag::note_cuda_const_var_unpromoted
@@ -19197,8 +19201,8 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
       }
     } else if (VarTarget == Sema::CVT_Device &&
                !Var->hasAttr<CUDASharedAttr>() &&
-               (UserTarget == Sema::CFT_Host ||
-                UserTarget == Sema::CFT_HostDevice)) {
+               (UserTarget == CUDAFunctionTarget::Host ||
+                UserTarget == CUDAFunctionTarget::HostDevice)) {
       // Record a CUDA/HIP device side variable if it is ODR-used
       // by host code. This is done conservatively, when the variable is
       // referenced in any of the following contexts:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index ac39cb99fc934..ce9d5c26e2185 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -44,6 +44,7 @@
 #include "clang/Sema/TemplateDeduction.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TypeSize.h"
@@ -884,7 +885,7 @@ ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex,
   // Exceptions aren't allowed in CUDA device code.
   if (getLangOpts().CUDA)
     CUDADiagIfDeviceCode(OpLoc, diag::err_cuda_device_exceptions)
-        << "throw" << CurrentCUDATarget();
+        << "throw" << llvm::to_underlying(CurrentCUDATarget());
 
   if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope())
     Diag(OpLoc, diag::err_omp_simd_region_cannot_use_stmt) << "throw";
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 1f674dd4bb0fb..e1155dc2d5d28 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1549,10 +1549,10 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
     // Don't allow overloading of destructors.  (In theory we could, but it
     // would be a giant change to clang.)
     if (!isa<CXXDestructorDecl>(New)) {
-      Sema::CUDAFunctionTarget NewTarget = SemaRef.IdentifyCUDATarget(New),
-                               OldTarget = SemaRef.IdentifyCUDATarget(Old);
-      if (NewTarget != Sema::CFT_InvalidTarget) {
-        assert((OldTarget != Sema::CFT_InvalidTarget) &&
+      CUDAFunctionTarget NewTarget = SemaRef.IdentifyCUDATarget(New),
+                         OldTarget = SemaRef.IdentifyCUDATarget(Old);
+      if (NewTarget != CUDAFunctionTarget::InvalidTarget) {
+        assert((OldTarget != CUDAFunctionTarget::InvalidTarget) &&
                "Unexpected invalid target.");
 
         // Allow overloading of functions with same signature and different CUDA
@@ -11938,8 +11938,8 @@ static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) {
   FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true);
   FunctionDecl *Callee = Cand->Function;
 
-  Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller),
-                           CalleeTarget = S.IdentifyCUDATarget(Callee);
+  CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller),
+                     CalleeTarget = S.IdentifyCUDATarget(Callee);
 
   std::string FnDesc;
   std::pair<OverloadCandidateKind, OverloadCandidateSelect> FnKindPair =
@@ -11949,7 +11949,7 @@ static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) {
   S.Diag(Callee->getLocation(), diag::note_ovl_candidate_bad_target)
       << (unsigned)FnKindPair.first << (unsigned)ocs_non_template
       << FnDesc /* Ignored */
-      << CalleeTarget << CallerTarget;
+      << llvm::to_underlying(CalleeTarget) << llvm::to_underlying(CallerTarget);
 
   // This could be an implicit constructor for which we could not infer the
   // target due to a collsion. Diagnose that case.
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index e53c76e65b03d..1c2f6120f6218 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -37,6 +37,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -4574,7 +4575,7 @@ StmtResult Sema::ActOnCXXTryBlock(SourceLocation TryLoc, Stmt *TryBlock,
   // Exceptions aren't allowed in CUDA device code.
   if (getLangOpts().CUDA)
     CUDADiagIfDeviceCode(TryLoc, diag::err_cuda_device_exceptions)
-        << "try" << CurrentCUDATarget();
+        << "try" << llvm::to_underlying(CurrentCUDATarget());
 
   if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope())
     Diag(TryLoc, diag::err_omp_simd_region_cannot_use_stmt) << "try";
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b8a1518fbe00a..b3f6078952f6e 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -37,6 +37,7 @@
 #include "clang/Sema/Template.h"
 #include "clang/Sema/TemplateInstCallback.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -376,11 +377,10 @@ enum TypeAttrLocation {
 static void
 processTypeAttrs(TypeProcessingState &state, QualType &type,
                  TypeAttrLocation TAL, const ParsedAttributesView &attrs,
-                 Sema::CUDAFunctionTarget CFT = Sema::CFT_HostDevice);
+                 CUDAFunctionTarget CFT = CUDAFunctionTarget::HostDevice);
 
 static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
-                                   QualType &type,
-                                   Sema::CUDAFunctionTarget CFT);
+                                   QualType &type, CUDAFunctionTarget CFT);
 
 static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state,
                                              ParsedAttr &attr, QualType &type);
@@ -627,7 +627,7 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state,
 static bool distributeFunctionTypeAttrToInnermost(
     TypeProcessingState &state, ParsedAttr &attr,
     ParsedAttributesView &attrList, QualType &declSpecType,
-    Sema::CUDAFunctionTarget CFT) {
+    CUDAFunctionTarget CFT) {
   Declarator &declarator = state.getDeclarator();
 
   // Put it on the innermost function chunk, if there is one.
@@ -644,10 +644,10 @@ static bool distributeFunctionTypeAttrToInnermost(
 
 /// A function type attribute was written in the decl spec.  Try to
 /// apply it somewhere.
-static void
-distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
-                                       ParsedAttr &attr, QualType &declSpecType,
-                                       Sema::CUDAFunctionTarget CFT) {
+static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
+                                                   ParsedAttr &attr,
+                                                   QualType &declSpecType,
+                                                   CUDAFunctionTarget CFT) {
   state.saveDeclSpecAttrs();
 
   // Try to distribute to the innermost.
@@ -664,9 +664,10 @@ distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
 /// Try to apply it somewhere.
 /// `Attrs` is the attribute list containing the declaration (either of the
 /// declarator or the declaration).
-static void distributeFunctionTypeAttrFromDeclarator(
-    TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType,
-    Sema::CUDAFunctionTarget CFT) {
+static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
+                                                     ParsedAttr &attr,
+                                                     QualType &declSpecType,
+                                                     CUDAFunctionTarget CFT) {
   Declarator &declarator = state.getDeclarator();
 
   // Try to distribute to the innermost.
@@ -694,7 +695,7 @@ static void distributeFunctionTypeAttrFromDeclarator(
 /// declarator or the declaration).
 static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
                                               QualType &declSpecType,
-                                              Sema::CUDAFunctionTarget CFT) {
+                                              CUDAFunctionTarget CFT) {
   // The called functions in this loop actually remove things from the current
   // list, so iterating over the existing list isn't possible.  Instead, make a
   // non-owning copy and iterate over that.
@@ -2734,7 +2735,7 @@ QualType Sema::BuildArrayType(QualType T, ArraySizeModifier ASM,
       bool IsCUDADevice = (getLangOpts().CUDA && getLangOpts().CUDAIsDevice);
       targetDiag(Loc,
                  IsCUDADevice ? diag::err_cuda_vla : diag::err_vla_unsupported)
-          << (IsCUDADevice ? CurrentCUDATarget() : 0);
+          << (IsCUDADevice ? llvm::to_underlying(CurrentCUDATarget()) : 0);
     } else if (sema::FunctionScopeInfo *FSI = getCurFunction()) {
       // VLAs are supported on this target, but we may need to do delayed
       // checking that the VLA is not being used within a coroutine.
@@ -8028,8 +8029,7 @@ static bool handleArmStateAttribute(Sema &S,
 /// Process an individual function attribute.  Returns true to
 /// indicate that the attribute was handled, false if it wasn't.
 static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
-                                   QualType &type,
-                                   Sema::CUDAFunctionTarget CFT) {
+                                   QualType &type, CUDAFunctionTarget CFT) {
   Sema &S = state.getSema();
 
   FunctionTypeUnwrapper unwrapped(S, type);
@@ -8863,7 +8863,7 @@ static void HandleHLSLParamModifierAttr(QualType &CurType,
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                              TypeAttrLocation TAL,
                              const ParsedAttributesView &attrs,
-                             Sema::CUDAFunctionTarget CFT) {
+                             CUDAFunctionTarget CFT) {
 
   state.setParsedNoDeref(false);
   if (attrs.empty())