Skip to content

Commit

Permalink
[clang][CodeGen] Emit atomic IR in place of optimized libcalls. (#73176)
Browse files Browse the repository at this point in the history
In the beginning, Clang only emitted atomic IR for operations it knew
the
underlying microarch had instructions for, meaning it required
significant
knowledge of the target. Later, the backend acquired the ability to
lower
IR to libcalls. To avoid duplicating logic and improve logic locality,
we'd like to move as much as possible to the backend.

There are many ways to describe this change. For example, this change
reduces the variables Clang uses to decide whether to emit libcalls or
IR, down to only the atomic's size.
  • Loading branch information
Logikable committed Feb 12, 2024
1 parent 20948df commit 5fdd094
Show file tree
Hide file tree
Showing 13 changed files with 497 additions and 569 deletions.
321 changes: 46 additions & 275 deletions clang/lib/CodeGen/CGAtomic.cpp

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions clang/test/CodeGen/LoongArch/atomics.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
// LA32: load atomic i8, ptr %a seq_cst, align 1
// LA32: store atomic i8 %b, ptr %a seq_cst, align 1
// LA32: atomicrmw add ptr %a, i8 %b seq_cst
// LA32: atomicrmw add ptr %a, i8 %b seq_cst, align 1
// LA64: load atomic i8, ptr %a seq_cst, align 1
// LA64: store atomic i8 %b, ptr %a seq_cst, align 1
// LA64: atomicrmw add ptr %a, i8 %b seq_cst
// LA64: atomicrmw add ptr %a, i8 %b seq_cst, align 1
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
Expand All @@ -23,22 +23,22 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
// LA32: load atomic i32, ptr %a seq_cst, align 4
// LA32: store atomic i32 %b, ptr %a seq_cst, align 4
// LA32: atomicrmw add ptr %a, i32 %b seq_cst
// LA32: atomicrmw add ptr %a, i32 %b seq_cst, align 4
// LA64: load atomic i32, ptr %a seq_cst, align 4
// LA64: store atomic i32 %b, ptr %a seq_cst, align 4
// LA64: atomicrmw add ptr %a, i32 %b seq_cst
// LA64: atomicrmw add ptr %a, i32 %b seq_cst, align 4
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
}

void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
// LA32: call i64 @__atomic_load_8
// LA32: call void @__atomic_store_8
// LA32: call i64 @__atomic_fetch_add_8
// LA32: load atomic i64, ptr %a seq_cst, align 8
// LA32: store atomic i64 %b, ptr %a seq_cst, align 8
// LA32: atomicrmw add ptr %a, i64 %b seq_cst, align 8
// LA64: load atomic i64, ptr %a seq_cst, align 8
// LA64: store atomic i64 %b, ptr %a seq_cst, align 8
// LA64: atomicrmw add ptr %a, i64 %b seq_cst
// LA64: atomicrmw add ptr %a, i64 %b seq_cst, align 8
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
Expand Down
50 changes: 18 additions & 32 deletions clang/test/CodeGen/PowerPC/quadword-atomics.c
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-QUADWORD-ATOMICS
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
// RUN: -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
// RUN: -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
// RUN: --check-prefix=PPC64-QUADWORD-ATOMICS
// RUN: -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS


typedef struct {
Expand All @@ -19,66 +23,48 @@ typedef _Atomic(Q) AtomicQ;

typedef __int128_t int128_t;

// PPC64-QUADWORD-ATOMICS-LABEL: @test_load(
// PPC64-QUADWORD-ATOMICS: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
//
// PPC64-LABEL: @test_load(
// PPC64: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP3:%.*]], ptr noundef [[TMP4:%.*]], i32 noundef signext 2)
// PPC64: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
//
Q test_load(AtomicQ *ptr) {
// expected-no-diagnostics
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
}

// PPC64-QUADWORD-ATOMICS-LABEL: @test_store(
// PPC64-QUADWORD-ATOMICS: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
//
// PPC64-LABEL: @test_store(
// PPC64: call void @__atomic_store(i64 noundef 16, ptr noundef [[TMP6:%.*]], ptr noundef [[TMP7:%.*]], i32 noundef signext 3)
// PPC64: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
//
void test_store(Q val, AtomicQ *ptr) {
// expected-no-diagnostics
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
}

// PPC64-QUADWORD-ATOMICS-LABEL: @test_add(
// PPC64-QUADWORD-ATOMICS: [[TMP3:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
//
// PPC64-LABEL: @test_add(
// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(ptr noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
// PPC64: [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
//
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
// expected-no-diagnostics
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
}

// PPC64-QUADWORD-ATOMICS-LABEL: @test_xchg(
// PPC64-QUADWORD-ATOMICS: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
//
// PPC64-LABEL: @test_xchg(
// PPC64: call void @__atomic_exchange(i64 noundef 16, ptr noundef [[TMP7:%.*]], ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], i32 noundef signext 5)
// PPC64: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
//
Q test_xchg(AtomicQ *ptr, Q new) {
// expected-no-diagnostics
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
}

// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg(
// PPC64-QUADWORD-ATOMICS: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
// PPC64-LABEL: @test_cmpxchg(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
// PPC64: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
}

// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg_weak(
// PPC64-QUADWORD-ATOMICS: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
// PPC64-LABEL: @test_cmpxchg_weak(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
// PPC64: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
//
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
// expected-no-diagnostics
Expand All @@ -88,8 +74,8 @@ int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
// PPC64-QUADWORD-ATOMICS-LABEL: @is_lock_free(
// PPC64-QUADWORD-ATOMICS: ret i32 1
//
// PPC64-LABEL: @is_lock_free(
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
// PPC64-NO-QUADWORD-ATOMICS-LABEL: @is_lock_free(
// PPC64-NO-QUADWORD-ATOMICS: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
//
int is_lock_free() {
AtomicQ q;
Expand Down
68 changes: 17 additions & 51 deletions clang/test/CodeGen/RISCV/riscv-atomics.c
Original file line number Diff line number Diff line change
@@ -1,68 +1,34 @@
// RUN: %clang_cc1 -triple riscv32 -O1 -emit-llvm %s -o - \
// RUN: | FileCheck %s -check-prefix=RV32I
// RUN: -verify=no-atomics
// RUN: %clang_cc1 -triple riscv32 -target-feature +a -O1 -emit-llvm %s -o - \
// RUN: | FileCheck %s -check-prefix=RV32IA
// RUN: -verify=small-atomics
// RUN: %clang_cc1 -triple riscv64 -O1 -emit-llvm %s -o - \
// RUN: | FileCheck %s -check-prefix=RV64I
// RUN: -verify=no-atomics
// RUN: %clang_cc1 -triple riscv64 -target-feature +a -O1 -emit-llvm %s -o - \
// RUN: | FileCheck %s -check-prefix=RV64IA
// RUN: -verify=all-atomics

// This test demonstrates that MaxAtomicInlineWidth is set appropriately when
// the atomics instruction set extension is enabled.
// all-atomics-no-diagnostics

#include <stdatomic.h>
#include <stdint.h>

void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
// RV32I: call zeroext i8 @__atomic_load_1
// RV32I: call void @__atomic_store_1
// RV32I: call zeroext i8 @__atomic_fetch_add_1
// RV32IA: load atomic i8, ptr %a seq_cst, align 1
// RV32IA: store atomic i8 %b, ptr %a seq_cst, align 1
// RV32IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
// RV64I: call zeroext i8 @__atomic_load_1
// RV64I: call void @__atomic_store_1
// RV64I: call zeroext i8 @__atomic_fetch_add_1
// RV64IA: load atomic i8, ptr %a seq_cst, align 1
// RV64IA: store atomic i8 %b, ptr %a seq_cst, align 1
// RV64IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
}

void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
// RV32I: call i32 @__atomic_load_4
// RV32I: call void @__atomic_store_4
// RV32I: call i32 @__atomic_fetch_add_4
// RV32IA: load atomic i32, ptr %a seq_cst, align 4
// RV32IA: store atomic i32 %b, ptr %a seq_cst, align 4
// RV32IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
// RV64I: call signext i32 @__atomic_load_4
// RV64I: call void @__atomic_store_4
// RV64I: call signext i32 @__atomic_fetch_add_4
// RV64IA: load atomic i32, ptr %a seq_cst, align 4
// RV64IA: store atomic i32 %b, ptr %a seq_cst, align 4
// RV64IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
}

void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
// RV32I: call i64 @__atomic_load_8
// RV32I: call void @__atomic_store_8
// RV32I: call i64 @__atomic_fetch_add_8
// RV32IA: call i64 @__atomic_load_8
// RV32IA: call void @__atomic_store_8
// RV32IA: call i64 @__atomic_fetch_add_8
// RV64I: call i64 @__atomic_load_8
// RV64I: call void @__atomic_store_8
// RV64I: call i64 @__atomic_fetch_add_8
// RV64IA: load atomic i64, ptr %a seq_cst, align 8
// RV64IA: store atomic i64 %b, ptr %a seq_cst, align 8
// RV64IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
__c11_atomic_load(a, memory_order_seq_cst);
__c11_atomic_store(a, b, memory_order_seq_cst);
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
// small-atomics-warning@28 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
// small-atomics-warning@30 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
// small-atomics-warning@32 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
}

0 comments on commit 5fdd094

Please sign in to comment.