From 46fe5f599666f59476ab351e3f11893bbaa6b844 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he@intel.com>
Date: Wed, 19 Nov 2025 06:22:21 +0100
Subject: [PATCH 1/2] [Clang] Add __scoped_atomic_uinc_wrap and
 __scoped_atomic_udec_wrap builtins

This PR extends __scoped_atomic builtins with inc and dec functions.
They map to LLVM IR `atomicrmw uinc_wrap` and `atomicrmw udec_wrap`.
These enable implementation of OpenCL-style atomic_inc / atomic_dec with
wrap semantics on targets supporting scoped atomics (e.g. GPUs).
---
 clang/docs/LanguageExtensions.rst      |   8 ++
 clang/docs/ReleaseNotes.rst            |   2 +
 clang/include/clang/Basic/Builtins.td  |  12 +++
 clang/lib/AST/Expr.cpp                 |   2 +
 clang/lib/CodeGen/CGAtomic.cpp         |  11 +++
 clang/lib/Sema/SemaChecking.cpp        |   2 +
 clang/test/CodeGen/scoped-atomic-ops.c | 105 +++++++++++++++++++++++++
 clang/test/Sema/scoped-atomic-ops.c    |  17 ++++
 8 files changed, 159 insertions(+)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a3db3e5d356b3..103383788601b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4854,6 +4854,14 @@ memory scope argument. These are designed to be a generic alternative to the
 ``__opencl_atomic_*`` builtin functions for targets that support atomic memory
 scopes.
 
+Clang privides two additional __scoped_atomic builtins:
+
+* ``__scoped_atomic_uinc_wrap``
+* ``__scoped_atomic_udec_wrap``
+
+See LLVM IR `atomicrmw <https://llvm.org/docs/LangRef.html#atomicrmw-instruction>`_
+instruction for the semantics of uinc_wrap and udec_wrap.
+
 Atomic memory scopes are designed to assist optimizations for systems with
 several levels of memory hierarchy like GPUs. The following memory scopes are
 currently supported:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c2da61e4d066a..c513f909b81e0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -236,6 +236,8 @@ C23 Feature Support
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
+- Added ``__scoped_atomic_uinc_wrap`` and ``__scoped_atomic_udec_wrap``.
+
 - Added ``__builtin_elementwise_ldexp``.
 
 - Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``.
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 47da17e5cfe83..ea9af3f53bf99 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -2290,6 +2290,18 @@ def ScopedAtomicMaxFetch : AtomicBuiltin {
   let Prototype = "void(...)";
 }
 
+def ScopedAtomicUIncWrap : AtomicBuiltin {
+  let Spellings = ["__scoped_atomic_uinc_wrap"];
+  let Attributes = [CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def ScopedAtomicUDecWrap : AtomicBuiltin {
+  let Spellings = ["__scoped_atomic_udec_wrap"];
+  let Attributes = [CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // OpenCL 2.0 atomic builtins.
 def OpenCLAtomicInit : AtomicBuiltin {
   let Spellings = ["__opencl_atomic_init"];
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1d914fa876759..e29967adbb09f 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -5216,6 +5216,8 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__scoped_atomic_fetch_min:
   case AO__scoped_atomic_fetch_max:
   case AO__scoped_atomic_exchange_n:
+  case AO__scoped_atomic_uinc_wrap:
+  case AO__scoped_atomic_udec_wrap:
   case AO__hip_atomic_exchange:
   case AO__hip_atomic_fetch_add:
   case AO__hip_atomic_fetch_sub:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index a0125817df493..ce97b7f726619 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -767,6 +767,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     Op = llvm::AtomicRMWInst::Nand;
     break;
 
+  case AtomicExpr::AO__scoped_atomic_uinc_wrap:
+    Op = llvm::AtomicRMWInst::UIncWrap;
+    break;
+  case AtomicExpr::AO__scoped_atomic_udec_wrap:
+    Op = llvm::AtomicRMWInst::UDecWrap;
+    break;
+
   case AtomicExpr::AO__atomic_test_and_set: {
     llvm::AtomicRMWInst *RMWI =
         CGF.emitAtomicRMWInst(llvm::AtomicRMWInst::Xchg, Ptr,
@@ -1071,6 +1078,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   case AtomicExpr::AO__scoped_atomic_xor_fetch:
   case AtomicExpr::AO__scoped_atomic_store_n:
   case AtomicExpr::AO__scoped_atomic_exchange_n:
+  case AtomicExpr::AO__scoped_atomic_uinc_wrap:
+  case AtomicExpr::AO__scoped_atomic_udec_wrap:
     Val1 = EmitValToTemp(*this, E->getVal1());
     break;
   }
@@ -1269,6 +1278,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__opencl_atomic_fetch_max:
     case AtomicExpr::AO__scoped_atomic_fetch_max:
     case AtomicExpr::AO__scoped_atomic_max_fetch:
+    case AtomicExpr::AO__scoped_atomic_uinc_wrap:
+    case AtomicExpr::AO__scoped_atomic_udec_wrap:
     case AtomicExpr::AO__atomic_test_and_set:
     case AtomicExpr::AO__atomic_clear:
       llvm_unreachable("Integral atomic operations always become atomicrmw!");
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cf407f7279c46..49316ad94480c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4479,6 +4479,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
   case AtomicExpr::AO__scoped_atomic_or_fetch:
   case AtomicExpr::AO__scoped_atomic_xor_fetch:
   case AtomicExpr::AO__scoped_atomic_nand_fetch:
+  case AtomicExpr::AO__scoped_atomic_uinc_wrap:
+  case AtomicExpr::AO__scoped_atomic_udec_wrap:
     Form = Arithmetic;
     break;
 
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index c39048120a457..1f9eb52e72ce9 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -4539,6 +4539,111 @@ _Bool fi7e(_Bool *c) {
   return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
                                     __MEMORY_SCOPE_SINGLE);
 }
+
+// AMDGCN_CL_DEF-LABEL: define hidden void @fi8a(
+// AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
+// AMDGCN_CL_DEF-NEXT:  [[ENTRY:.*:]]
+// AMDGCN_CL_DEF-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// AMDGCN_CL_DEF-NEXT:    store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    store i32 -1, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    store i32 -1, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// AMDGCN_CL_DEF-NEXT:    ret void
+//
+// AMDGCN_CL_20-LABEL: define hidden void @fi8a(
+// AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
+// AMDGCN_CL_20-NEXT:  [[ENTRY:.*:]]
+// AMDGCN_CL_20-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// AMDGCN_CL_20-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// AMDGCN_CL_20-NEXT:    [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
+// AMDGCN_CL_20-NEXT:    [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
+// AMDGCN_CL_20-NEXT:    [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
+// AMDGCN_CL_20-NEXT:    [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
+// AMDGCN_CL_20-NEXT:    store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    store i32 -1, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    store i32 -1, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// AMDGCN_CL_20-NEXT:    ret void
+//
+// SPIRV-LABEL: define hidden spir_func void @fi8a(
+// SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// SPIRV-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// SPIRV-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    [[DOTATOMICTMP1:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// SPIRV-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// SPIRV-NEXT:    store i32 -1, ptr [[DOTATOMICTMP]], align 4
+// SPIRV-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4
+// SPIRV-NEXT:    [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("device") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
+// SPIRV-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// SPIRV-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// SPIRV-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// SPIRV-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// SPIRV-NEXT:    store i32 -1, ptr [[DOTATOMICTMP1]], align 4
+// SPIRV-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4
+// SPIRV-NEXT:    [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("device") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4
+// SPIRV-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
+// SPIRV-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// SPIRV-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// SPIRV-NEXT:    ret void
+//
+void fi8a(int *a, int *b) {
+  *b = __scoped_atomic_uinc_wrap(b, ~0U, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
+  *a = __scoped_atomic_udec_wrap(a, ~0U, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
+}
+
 //.
 // AMDGCN_CL_DEF: [[META3]] = !{}
 //.
diff --git a/clang/test/Sema/scoped-atomic-ops.c b/clang/test/Sema/scoped-atomic-ops.c
index 33044aa256cb0..fce856c02177b 100644
--- a/clang/test/Sema/scoped-atomic-ops.c
+++ b/clang/test/Sema/scoped-atomic-ops.c
@@ -40,6 +40,8 @@ void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
   *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
   *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
   *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+  *h = __scoped_atomic_uinc_wrap(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
+  *g = __scoped_atomic_udec_wrap(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
 }
 
 void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
@@ -51,6 +53,8 @@ void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
   *f = __scoped_atomic_fetch_nand(1, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
   *g = __scoped_atomic_fetch_min(1, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
   *h = __scoped_atomic_fetch_max(1, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  *h = __scoped_atomic_uinc_wrap(1, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  *g = __scoped_atomic_udec_wrap(1, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
 }
 
 void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
@@ -62,6 +66,8 @@ void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
   *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED); // expected-error {{too few arguments to function call, expected 4, have 3}}
   *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED); // expected-error {{too few arguments to function call, expected 4, have 3}}
   *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED); // expected-error {{too few arguments to function call, expected 4, have 3}}
+  *h = __scoped_atomic_uinc_wrap(h, 1, __ATOMIC_RELAXED); // expected-error {{too few arguments to function call, expected 4, have 3}}
+  *g = __scoped_atomic_udec_wrap(g, 1, __ATOMIC_RELAXED); // expected-error {{too few arguments to function call, expected 4, have 3}}
 }
 
 void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
@@ -73,6 +79,17 @@ void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
   *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, 42); // expected-error {{synchronization scope argument to atomic operation is invalid}}
   *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, 42); // expected-error {{synchronization scope argument to atomic operation is invalid}}
   *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, 42); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+  *h = __scoped_atomic_uinc_wrap(h, 1, __ATOMIC_RELAXED, 42); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+  *g = __scoped_atomic_udec_wrap(g, 1, __ATOMIC_RELAXED, 42); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+}
+
+void fi3e(float *a, float *b, float *c, float *d, float *e, float *f) {
+  *a = __scoped_atomic_fetch_and(a, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
+  *b = __scoped_atomic_fetch_or(b, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
+  *c = __scoped_atomic_fetch_xor(c, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
+  *d = __scoped_atomic_fetch_nand(d, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
+  *f = __scoped_atomic_uinc_wrap(f, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
+  *e = __scoped_atomic_udec_wrap(e, 1, __ATOMIC_RELAXED, 42); // expected-error {{address argument to atomic operation must be a pointer to integer ('float *' invalid)}}
 }
 
 int fi4a(int *i) {

From 702033783cefb10b86dbf595ce92340cf6c0fc51 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he@intel.com>
Date: Wed, 19 Nov 2025 13:25:38 +0800
Subject: [PATCH 2/2] Update clang/docs/LanguageExtensions.rst

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 clang/docs/LanguageExtensions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 103383788601b..80cea2166bc83 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4854,7 +4854,7 @@ memory scope argument. These are designed to be a generic alternative to the
 ``__opencl_atomic_*`` builtin functions for targets that support atomic memory
 scopes.
 
-Clang privides two additional __scoped_atomic builtins:
+Clang provides two additional __scoped_atomic builtins:
 
 * ``__scoped_atomic_uinc_wrap``
 * ``__scoped_atomic_udec_wrap``