diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d2e5bd284d350..da391082eba5b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -401,6 +401,7 @@ Bug Fixes to Attribute Support
 - Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
   ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
 - Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
+- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 3c697ed8dd882..6ec0eac529245 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1599,7 +1599,7 @@ def CUDAShared : InheritableAttr {
 }
 def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
 
-def DeviceKernel : DeclOrTypeAttr {
+def DeviceKernel : InheritableAttr {
   let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
                    Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
                    CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 66a1b684ec68b..568d56ab0b911 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   }
   case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
   case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
-  case attr::DeviceKernel:
-    OS << T->getAttr()->getSpelling();
-    break;
   case attr::IntelOclBicc:
     OS << "inteloclbicc";
     break;
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 33c29586359be..f5c8396f398aa 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
     // a host function.
     if (HostTarget)
       return HostTarget->checkCallingConvention(CC);
-    return CCCR_Warning;
+    return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
   }
 
   bool hasBitIntType() const override { return true; }
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..d9e3c9969686d 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     return;
 
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD)
+  if (FD) {
     setFunctionDeclAttributes(FD, F, M);
-
+    if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
+      F->setCallingConv(getDeviceKernelCallingConv());
+  }
   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
     F->addFnAttr("amdgpu-ieee", "false");
 }
@@ -634,7 +636,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
   // kernel address (only the kernel descriptor).
   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
                                    &Mod);
-  F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+  F->setCallingConv(getDeviceKernelCallingConv());
 
   llvm::AttrBuilder KernelAttrs(C);
   // FIXME: The invoke isn't applying the right attributes either
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 53f2fc4a040a7..f6715861d91bc 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -264,7 +264,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
       // And kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
       if (FD->hasAttr<CUDAGlobalAttr>()) {
-        F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+        F->setCallingConv(getDeviceKernelCallingConv());
 
         for (auto IV : llvm::enumerate(FD->parameters()))
           if (IV.value()->hasAttr<CUDAGridConstantAttr>())
@@ -278,7 +278,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
   }
   // Attach kernel metadata directly if compiling for NVPTX.
   if (FD->hasAttr<DeviceKernelAttr>())
-    F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+    F->setCallingConv(getDeviceKernelCallingConv());
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..b3b688891bf07 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -61,6 +61,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
       QualType SampledType, CodeGenModule &CGM) const;
   void
   setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
 };
 class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
 public:
@@ -240,6 +242,22 @@ void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
       FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
 }
 
+void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (M.getLangOpts().OpenCL || GV->isDeclaration())
+    return;
+
+  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  if (!FD)
+    return;
+
+  llvm::Function *F = dyn_cast<llvm::Function>(GV);
+  assert(F && "Expected GlobalValue to be a Function");
+
+  if (FD->hasAttr<DeviceKernelAttr>())
+    F->setCallingConv(getDeviceKernelCallingConv());
+}
+
 LangAS
 SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
                                                  const VarDecl *D) const {
@@ -264,19 +282,23 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
 
 void SPIRVTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
-  if (!M.getLangOpts().HIP ||
-      M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
-    return;
   if (GV->isDeclaration())
     return;
 
-  auto F = dyn_cast<llvm::Function>(GV);
-  if (!F)
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+  if (!FD)
     return;
 
-  auto FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (!FD)
+  llvm::Function *F = dyn_cast<llvm::Function>(GV);
+  assert(F && "Expected GlobalValue to be a Function");
+
+  if (FD->hasAttr<DeviceKernelAttr>())
+    F->setCallingConv(getDeviceKernelCallingConv());
+
+  if (!M.getLangOpts().HIP ||
+      M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
     return;
+
   if (!FD->hasAttr<CUDAGlobalAttr>())
     return;
 
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 328ccf6694073..f08e1da400459 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5204,25 +5204,53 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
 static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
   bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
-  if (S.getLangOpts().SYCLIsDevice) {
+  llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
+  const LangOptions &LangOpts = S.getLangOpts();
+
+  if (LangOpts.SYCLIsDevice) {
     if (!IsFunctionTemplate) {
       S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
           << AL << AL.isRegularKeywordAttribute() << "function templates";
-    } else {
-      S.SYCL().handleKernelAttr(D, AL);
+      AL.setInvalid();
+      return;
     }
+    S.SYCL().handleKernelAttr(D, AL);
   } else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
     S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
-  } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
+    AL.setInvalid();
+    return;
+  } else if (Triple.isNVPTX()) {
     handleGlobalAttr(S, D, AL);
   } else {
     // OpenCL C++ will throw a more specific error.
-    if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
+    if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
       S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
           << AL << AL.isRegularKeywordAttribute() << "functions";
+      AL.setInvalid();
+      return;
     }
     handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
   }
+  // TODO: isGPU() should probably return true for SPIR.
+  bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
+                                 LangOpts.isTargetDevice() || LangOpts.OpenCL;
+  bool IsAMDGPUMismatch =
+      DeviceKernelAttr::isAMDGPUSpelling(AL) && !Triple.isAMDGPU();
+  bool IsNVPTXMismatch =
+      DeviceKernelAttr::isNVPTXSpelling(AL) && !Triple.isNVPTX();
+  if (IsAMDGPUMismatch || IsNVPTXMismatch || !TargetDeviceEnvironment) {
+    // While both are just different spellings of the same underlying
+    // attribute, it makes more sense to the user if amdgpu_kernel can only
+    // be used on AMDGPU and the equivalent for NVPTX, so warn and ignore
+    // the attribute if there's a mismatch.
+    // Also warn if this is not an environment where a device kernel makes
+    // sense.
+    S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
+        << AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
+    AL.setInvalid();
+    return;
+  }
+
   // Make sure we validate the CC with the target
   // and warn/error if necessary.
   handleCallConvAttr(S, D, AL);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index bee613aa5f1c5..0d5b0e7e842b3 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
   case ParsedAttr::AT_VectorCall:                                              \
   case ParsedAttr::AT_AArch64VectorPcs:                                        \
   case ParsedAttr::AT_AArch64SVEPcs:                                           \
-  case ParsedAttr::AT_DeviceKernel:                                            \
   case ParsedAttr::AT_MSABI:                                                   \
   case ParsedAttr::AT_SysVABI:                                                 \
   case ParsedAttr::AT_Pcs:                                                     \
@@ -3781,7 +3780,8 @@ static CallingConv getCCForDeclaratorChunk(
     }
   }
   if (!S.getLangOpts().isSYCL()) {
-    for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
+    for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
+             D.getDeclSpec().getAttributes(), D.getAttributes())) {
       if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
         CC = CC_DeviceKernel;
         break;
@@ -7565,8 +7565,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
     return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
   case ParsedAttr::AT_ArmStreaming:
     return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
-  case ParsedAttr::AT_DeviceKernel:
-    return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
   case ParsedAttr::AT_Pcs: {
     // The attribute may have had a fixit applied where we treated an
     // identifier as a string literal.  The contents of the string are valid,
@@ -8805,16 +8803,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
   }
 }
 
-static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
-  // The DeviceKernel attribute is shared for many targets, and
-  // it is only allowed to be a type attribute with the AMDGPU
-  // spelling, so skip processing the attr as a type attr
-  // unless it has that spelling.
-  if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
-    return true;
-  return DeviceKernelAttr::isAMDGPUSpelling(Attr);
-}
-
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                              TypeAttrLocation TAL,
                              const ParsedAttributesView &attrs,
@@ -9068,8 +9056,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
         break;
       [[fallthrough]];
     FUNCTION_TYPE_ATTRS_CASELIST:
-      if (!isMultiSubjectAttrAllowedOnType(attr))
-        break;
 
       attr.setUsedAsTypeAttr();
 
diff --git a/clang/test/Sema/callingconv-devicekernel.c b/clang/test/Sema/callingconv-devicekernel.c
new file mode 100644
index 0000000000000..869687f8ca65d
--- /dev/null
+++ b/clang/test/Sema/callingconv-devicekernel.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-AMDGPU %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-NVPTX %s
+// RUN: %clang_cc1 -triple spir64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s
+// RUN: %clang_cc1 -triple spirv64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel1()
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel1()
+// CHECK-SPIR-DAG: spir_kernel void @kernel1()
+[[clang::device_kernel]] void kernel1() {}
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel2()
+// CHECK-NVPTX-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
+// CHECK-SPIR-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
+[[clang::amdgpu_kernel]] void kernel2() {}
+
+// CHECK-AMDGPU-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel3()
+// CHECK-SPIR-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
+[[clang::nvptx_kernel]] void kernel3() {}
+
+// CHECK-AMDGPU-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-NVPTX-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-SPIR-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+[[clang::sycl_kernel]] void kernel4() {}
diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index f0b8b80a32974..28342b56af39a 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar
 int __attribute__((aarch64_sve_pcs)) aasvepcs(void);  // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}
 
 int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}
+int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}}
+}
+int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}}
+}
 
 // PR6361
 void ctest3();