[clang][CodeGen] Emit atomic IR in place of optimized libcalls. (#73176)

In the beginning, Clang only emitted atomic IR for operations it knew the underlying microarch had instructions for, meaning it required significant knowledge of the target. Later, the backend acquired the ability to lower IR to libcalls. To avoid duplicating logic and improve logic locality, we'd like to move as much as possible to the backend. There are many ways to describe this change. For example, this change reduces the variables Clang uses to decide whether to emit libcalls or IR, down to only the atomic's size.
llvm · Feb 12, 2024 · 5fdd094 · 5fdd094
1 parent 20948df
commit 5fdd094
Show file tree

Hide file tree

Showing 13 changed files with 497 additions and 569 deletions.
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
diff --git a/clang/test/CodeGen/LoongArch/atomics.c b/clang/test/CodeGen/LoongArch/atomics.c
@@ -11,10 +11,10 @@
 void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
   // LA32: load atomic i8, ptr %a seq_cst, align 1
   // LA32: store atomic i8 %b, ptr %a seq_cst, align 1
-  // LA32: atomicrmw add ptr %a, i8 %b seq_cst
+  // LA32: atomicrmw add ptr %a, i8 %b seq_cst, align 1
   // LA64: load atomic i8, ptr %a seq_cst, align 1
   // LA64: store atomic i8 %b, ptr %a seq_cst, align 1
-  // LA64: atomicrmw add ptr %a, i8 %b seq_cst
+  // LA64: atomicrmw add ptr %a, i8 %b seq_cst, align 1
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
@@ -23,22 +23,22 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
 void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
   // LA32: load atomic i32, ptr %a seq_cst, align 4
   // LA32: store atomic i32 %b, ptr %a seq_cst, align 4
-  // LA32: atomicrmw add ptr %a, i32 %b seq_cst
+  // LA32: atomicrmw add ptr %a, i32 %b seq_cst, align 4
   // LA64: load atomic i32, ptr %a seq_cst, align 4
   // LA64: store atomic i32 %b, ptr %a seq_cst, align 4
-  // LA64: atomicrmw add ptr %a, i32 %b seq_cst
+  // LA64: atomicrmw add ptr %a, i32 %b seq_cst, align 4
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // LA32: call i64 @__atomic_load_8
-  // LA32: call void @__atomic_store_8
-  // LA32: call i64 @__atomic_fetch_add_8
+  // LA32: load atomic i64, ptr %a seq_cst, align 8
+  // LA32: store atomic i64 %b, ptr %a seq_cst, align 8
+  // LA32: atomicrmw add ptr %a, i64 %b seq_cst, align 8
   // LA64: load atomic i64, ptr %a seq_cst, align 8
   // LA64: store atomic i64 %b, ptr %a seq_cst, align 8
-  // LA64: atomicrmw add ptr %a, i64 %b seq_cst
+  // LA64: atomicrmw add ptr %a, i64 %b seq_cst, align 8
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);

diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -1,14 +1,18 @@
 // RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
-// RUN:   -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-QUADWORD-ATOMICS
+// RUN:   -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
+// RUN:   --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
 // RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
-// RUN:   -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
+// RUN:   -emit-llvm -o - %s | FileCheck %s \
+// RUN:   --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
 // RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
-// RUN:   -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
+// RUN:   -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s \
+// RUN:   --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
 // RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
-// RUN:   -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
+// RUN:   -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
+// RUN:   --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
 // RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
-// RUN:   -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
-// RUN:   --check-prefix=PPC64-QUADWORD-ATOMICS
+// RUN:   -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | \
+// RUN:   FileCheck %s --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
 
 
 typedef struct {
@@ -19,66 +23,48 @@ typedef _Atomic(Q) AtomicQ;
 
 typedef __int128_t int128_t;
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_load(
-// PPC64-QUADWORD-ATOMICS:    [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
-//
 // PPC64-LABEL: @test_load(
-// PPC64:    call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP3:%.*]], ptr noundef [[TMP4:%.*]], i32 noundef signext 2)
+// PPC64:    [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
 //
 Q test_load(AtomicQ *ptr) {
   // expected-no-diagnostics
   return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
 }
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_store(
-// PPC64-QUADWORD-ATOMICS:    store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
-//
 // PPC64-LABEL: @test_store(
-// PPC64:    call void @__atomic_store(i64 noundef 16, ptr noundef [[TMP6:%.*]], ptr noundef [[TMP7:%.*]], i32 noundef signext 3)
+// PPC64:    store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
 //
 void test_store(Q val, AtomicQ *ptr) {
   // expected-no-diagnostics
   __c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
 }
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_add(
-// PPC64-QUADWORD-ATOMICS:    [[TMP3:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
-//
 // PPC64-LABEL: @test_add(
-// PPC64:    [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(ptr noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
+// PPC64:    [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
 //
 void test_add(_Atomic(int128_t) *ptr, int128_t x) {
   // expected-no-diagnostics
   __c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
 }
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_xchg(
-// PPC64-QUADWORD-ATOMICS:    [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
-//
 // PPC64-LABEL: @test_xchg(
-// PPC64:    call void @__atomic_exchange(i64 noundef 16, ptr noundef [[TMP7:%.*]], ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], i32 noundef signext 5)
+// PPC64:    [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
 //
 Q test_xchg(AtomicQ *ptr, Q new) {
   // expected-no-diagnostics
   return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
 }
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg(
-// PPC64-QUADWORD-ATOMICS:    [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
-//
 // PPC64-LABEL: @test_cmpxchg(
-// PPC64:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+// PPC64:    [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
   // expected-no-diagnostics
   return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
 }
 
-// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg_weak(
-// PPC64-QUADWORD-ATOMICS:    [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
-//
 // PPC64-LABEL: @test_cmpxchg_weak(
-// PPC64:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+// PPC64:    [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
   // expected-no-diagnostics
@@ -88,8 +74,8 @@ int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
 // PPC64-QUADWORD-ATOMICS-LABEL: @is_lock_free(
 // PPC64-QUADWORD-ATOMICS:    ret i32 1
 //
-// PPC64-LABEL: @is_lock_free(
-// PPC64:    [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
+// PPC64-NO-QUADWORD-ATOMICS-LABEL: @is_lock_free(
+// PPC64-NO-QUADWORD-ATOMICS:    [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
 //
 int is_lock_free() {
   AtomicQ q;

diff --git a/clang/test/CodeGen/RISCV/riscv-atomics.c b/clang/test/CodeGen/RISCV/riscv-atomics.c
@@ -1,68 +1,34 @@
 // RUN: %clang_cc1 -triple riscv32 -O1 -emit-llvm %s -o - \
-// RUN:   | FileCheck %s -check-prefix=RV32I
+// RUN:   -verify=no-atomics
 // RUN: %clang_cc1 -triple riscv32 -target-feature +a -O1 -emit-llvm %s -o - \
-// RUN:   | FileCheck %s -check-prefix=RV32IA
+// RUN:   -verify=small-atomics
 // RUN: %clang_cc1 -triple riscv64 -O1 -emit-llvm %s -o - \
-// RUN:   | FileCheck %s -check-prefix=RV64I
+// RUN:   -verify=no-atomics
 // RUN: %clang_cc1 -triple riscv64 -target-feature +a -O1 -emit-llvm %s -o - \
-// RUN:   | FileCheck %s -check-prefix=RV64IA
+// RUN:   -verify=all-atomics
 
-// This test demonstrates that MaxAtomicInlineWidth is set appropriately when
-// the atomics instruction set extension is enabled.
+// all-atomics-no-diagnostics
 
 #include <stdatomic.h>
 #include <stdint.h>
 
 void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
-  // RV32I:  call zeroext i8 @__atomic_load_1
-  // RV32I:  call void @__atomic_store_1
-  // RV32I:  call zeroext i8 @__atomic_fetch_add_1
-  // RV32IA: load atomic i8, ptr %a seq_cst, align 1
-  // RV32IA: store atomic i8 %b, ptr %a seq_cst, align 1
-  // RV32IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
-  // RV64I:  call zeroext i8 @__atomic_load_1
-  // RV64I:  call void @__atomic_store_1
-  // RV64I:  call zeroext i8 @__atomic_fetch_add_1
-  // RV64IA: load atomic i8, ptr %a seq_cst, align 1
-  // RV64IA: store atomic i8 %b, ptr %a seq_cst, align 1
-  // RV64IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
-  __c11_atomic_load(a, memory_order_seq_cst);
-  __c11_atomic_store(a, b, memory_order_seq_cst);
-  __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
+  __c11_atomic_load(a, memory_order_seq_cst);         // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0  bytes)}}
+  __c11_atomic_store(a, b, memory_order_seq_cst);     // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0  bytes)}}
+  __c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0  bytes)}}
 }
 
 void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
-  // RV32I:  call i32 @__atomic_load_4
-  // RV32I:  call void @__atomic_store_4
-  // RV32I:  call i32 @__atomic_fetch_add_4
-  // RV32IA: load atomic i32, ptr %a seq_cst, align 4
-  // RV32IA: store atomic i32 %b, ptr %a seq_cst, align 4
-  // RV32IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
-  // RV64I:  call signext i32 @__atomic_load_4
-  // RV64I:  call void @__atomic_store_4
-  // RV64I:  call signext i32 @__atomic_fetch_add_4
-  // RV64IA: load atomic i32, ptr %a seq_cst, align 4
-  // RV64IA: store atomic i32 %b, ptr %a seq_cst, align 4
-  // RV64IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
-  __c11_atomic_load(a, memory_order_seq_cst);
-  __c11_atomic_store(a, b, memory_order_seq_cst);
-  __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
+  __c11_atomic_load(a, memory_order_seq_cst);         // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0  bytes)}}
+  __c11_atomic_store(a, b, memory_order_seq_cst);     // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0  bytes)}}
+  __c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0  bytes)}}
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // RV32I:  call i64 @__atomic_load_8
-  // RV32I:  call void @__atomic_store_8
-  // RV32I:  call i64 @__atomic_fetch_add_8
-  // RV32IA: call i64 @__atomic_load_8
-  // RV32IA: call void @__atomic_store_8
-  // RV32IA: call i64 @__atomic_fetch_add_8
-  // RV64I:  call i64 @__atomic_load_8
-  // RV64I:  call void @__atomic_store_8
-  // RV64I:  call i64 @__atomic_fetch_add_8
-  // RV64IA: load atomic i64, ptr %a seq_cst, align 8
-  // RV64IA: store atomic i64 %b, ptr %a seq_cst, align 8
-  // RV64IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
-  __c11_atomic_load(a, memory_order_seq_cst);
-  __c11_atomic_store(a, b, memory_order_seq_cst);
-  __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
+  __c11_atomic_load(a, memory_order_seq_cst);         // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0  bytes)}}
+                                                      // small-atomics-warning@28 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4  bytes)}}
+  __c11_atomic_store(a, b, memory_order_seq_cst);     // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0  bytes)}}
+                                                      // small-atomics-warning@30 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4  bytes)}}
+  __c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0  bytes)}}
+                                                      // small-atomics-warning@32 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4  bytes)}}
 }