500 changes: 492 additions & 8 deletions llvm/lib/CodeGen/AtomicExpandPass.cpp

Large diffs are not rendered by default.

64 changes: 63 additions & 1 deletion llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,66 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";


Names[RTLIB::ATOMIC_LOAD] = "__atomic_load";
Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1";
Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2";
Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4";
Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8";
Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16";

Names[RTLIB::ATOMIC_STORE] = "__atomic_store";
Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1";
Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2";
Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4";
Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8";
Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16";

Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange";
Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1";
Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2";
Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4";
Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8";
Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16";

Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange";
Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1";
Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2";
Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4";
Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8";
Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16";

Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1";
Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2";
Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4";
Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8";
Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16";
Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1";
Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2";
Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4";
Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8";
Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16";
Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1";
Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2";
Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4";
Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8";
Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16";
Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1";
Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2";
Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4";
Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8";
Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16";
Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1";
Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2";
Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4";
Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8";
Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16";
Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1";
Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2";
Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4";
Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8";
Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16";

if (TT.getEnvironment() == Triple::GNU) {
Names[RTLIB::SINCOS_F32] = "sincosf";
Names[RTLIB::SINCOS_F64] = "sincos";
Expand Down Expand Up @@ -777,6 +836,9 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
GatherAllAliasesMaxDepth = 6;
MinStackArgumentAlignment = 1;
MinimumJumpTableEntries = 4;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;

InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,13 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
}

// ATOMICs.
// Atomics are only supported on Sparcv9. (32bit atomics are also
// supported by the Leon sparcv8 variant, but we don't support that
// yet.)
if (Subtarget->isV9())
setMaxAtomicSizeInBitsSupported(64);
else
setMaxAtomicSizeInBitsSupported(0);

setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32,
Expand Down
257 changes: 257 additions & 0 deletions llvm/test/Transforms/AtomicExpand/SPARC/libcalls.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
; RUN: opt -S %s -atomic-expand | FileCheck %s

;;; NOTE: this test is actually target-independent -- any target which
;;; doesn't support inline atomics can be used. (E.g. X86 i386 would
;;; work, if LLVM is properly taught about what it's missing vs i586.)

;target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
;target triple = "i386-unknown-unknown"
target datalayout = "e-m:e-p:32:32-i64:64-f128:64-n32-S64"
target triple = "sparc-unknown-unknown"

;; First, check the sized calls. Except for cmpxchg, these are fairly
;; straightforward.

; CHECK-LABEL: @test_load_i16(
; CHECK: %1 = bitcast i16* %arg to i8*
; CHECK: %2 = call i16 @__atomic_load_2(i8* %1, i32 5)
; CHECK: ret i16 %2
define i16 @test_load_i16(i16* %arg) {
%ret = load atomic i16, i16* %arg seq_cst, align 4
ret i16 %ret
}

; CHECK-LABEL: @test_store_i16(
; CHECK: %1 = bitcast i16* %arg to i8*
; CHECK: call void @__atomic_store_2(i8* %1, i16 %val, i32 5)
; CHECK: ret void
define void @test_store_i16(i16* %arg, i16 %val) {
store atomic i16 %val, i16* %arg seq_cst, align 4
ret void
}

; CHECK-LABEL: @test_exchange_i16(
; CHECK: %1 = bitcast i16* %arg to i8*
; CHECK: %2 = call i16 @__atomic_exchange_2(i8* %1, i16 %val, i32 5)
; CHECK: ret i16 %2
define i16 @test_exchange_i16(i16* %arg, i16 %val) {
%ret = atomicrmw xchg i16* %arg, i16 %val seq_cst
ret i16 %ret
}

; CHECK-LABEL: @test_cmpxchg_i16(
; CHECK: %1 = bitcast i16* %arg to i8*
; CHECK: %2 = alloca i16, align 2
; CHECK: %3 = bitcast i16* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 2, i8* %3)
; CHECK: store i16 %old, i16* %2, align 2
; CHECK: %4 = call zeroext i1 @__atomic_compare_exchange_2(i8* %1, i8* %3, i16 %new, i32 5, i32 0)
; CHECK: %5 = load i16, i16* %2, align 2
; CHECK: call void @llvm.lifetime.end(i64 2, i8* %3)
; CHECK: %6 = insertvalue { i16, i1 } undef, i16 %5, 0
; CHECK: %7 = insertvalue { i16, i1 } %6, i1 %4, 1
; CHECK: %ret = extractvalue { i16, i1 } %7, 0
; CHECK: ret i16 %ret
define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
%ret_succ = cmpxchg i16* %arg, i16 %old, i16 %new seq_cst monotonic
%ret = extractvalue { i16, i1 } %ret_succ, 0
ret i16 %ret
}

; CHECK-LABEL: @test_add_i16(
; CHECK: %1 = bitcast i16* %arg to i8*
; CHECK: %2 = call i16 @__atomic_fetch_add_2(i8* %1, i16 %val, i32 5)
; CHECK: ret i16 %2
define i16 @test_add_i16(i16* %arg, i16 %val) {
%ret = atomicrmw add i16* %arg, i16 %val seq_cst
ret i16 %ret
}


;; Now, check the output for the unsized libcalls. i128 is used for
;; these tests because the "16" suffixed functions aren't available on
;; 32-bit i386.

; CHECK-LABEL: @test_load_i128(
; CHECK: %1 = bitcast i128* %arg to i8*
; CHECK: %2 = alloca i128, align 8
; CHECK: %3 = bitcast i128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
; CHECK: call void @__atomic_load(i32 16, i8* %1, i8* %3, i32 5)
; CHECK: %4 = load i128, i128* %2, align 8
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
; CHECK: ret i128 %4
define i128 @test_load_i128(i128* %arg) {
%ret = load atomic i128, i128* %arg seq_cst, align 16
ret i128 %ret
}

; CHECK-LABEL @test_store_i128(
; CHECK: %1 = bitcast i128* %arg to i8*
; CHECK: %2 = alloca i128, align 8
; CHECK: %3 = bitcast i128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
; CHECK: store i128 %val, i128* %2, align 8
; CHECK: call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
; CHECK: ret void
define void @test_store_i128(i128* %arg, i128 %val) {
store atomic i128 %val, i128* %arg seq_cst, align 16
ret void
}

; CHECK-LABEL: @test_exchange_i128(
; CHECK: %1 = bitcast i128* %arg to i8*
; CHECK: %2 = alloca i128, align 8
; CHECK: %3 = bitcast i128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
; CHECK: store i128 %val, i128* %2, align 8
; CHECK: %4 = alloca i128, align 8
; CHECK: %5 = bitcast i128* %4 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
; CHECK: call void @__atomic_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5)
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
; CHECK: %6 = load i128, i128* %4, align 8
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
; CHECK: ret i128 %6
define i128 @test_exchange_i128(i128* %arg, i128 %val) {
%ret = atomicrmw xchg i128* %arg, i128 %val seq_cst
ret i128 %ret
}

; CHECK-LABEL: @test_cmpxchg_i128(
; CHECK: %1 = bitcast i128* %arg to i8*
; CHECK: %2 = alloca i128, align 8
; CHECK: %3 = bitcast i128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
; CHECK: store i128 %old, i128* %2, align 8
; CHECK: %4 = alloca i128, align 8
; CHECK: %5 = bitcast i128* %4 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
; CHECK: store i128 %new, i128* %4, align 8
; CHECK: %6 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5, i32 0)
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
; CHECK: %7 = load i128, i128* %2, align 8
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
; CHECK: %8 = insertvalue { i128, i1 } undef, i128 %7, 0
; CHECK: %9 = insertvalue { i128, i1 } %8, i1 %6, 1
; CHECK: %ret = extractvalue { i128, i1 } %9, 0
; CHECK: ret i128 %ret
define i128 @test_cmpxchg_i128(i128* %arg, i128 %old, i128 %new) {
%ret_succ = cmpxchg i128* %arg, i128 %old, i128 %new seq_cst monotonic
%ret = extractvalue { i128, i1 } %ret_succ, 0
ret i128 %ret
}

; This one is a verbose expansion, as there is no generic
; __atomic_fetch_add function, so it needs to expand to a cmpxchg
; loop, which then itself expands into a libcall.

; CHECK-LABEL: @test_add_i128(
; CHECK: %1 = alloca i128, align 8
; CHECK: %2 = alloca i128, align 8
; CHECK: %3 = load i128, i128* %arg, align 16
; CHECK: br label %atomicrmw.start
; CHECK:atomicrmw.start:
; CHECK: %loaded = phi i128 [ %3, %0 ], [ %newloaded, %atomicrmw.start ]
; CHECK: %new = add i128 %loaded, %val
; CHECK: %4 = bitcast i128* %arg to i8*
; CHECK: %5 = bitcast i128* %1 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
; CHECK: store i128 %loaded, i128* %1, align 8
; CHECK: %6 = bitcast i128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %6)
; CHECK: store i128 %new, i128* %2, align 8
; CHECK: %7 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %4, i8* %5, i8* %6, i32 5, i32 5)
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %6)
; CHECK: %8 = load i128, i128* %1, align 8
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
; CHECK: %9 = insertvalue { i128, i1 } undef, i128 %8, 0
; CHECK: %10 = insertvalue { i128, i1 } %9, i1 %7, 1
; CHECK: %success = extractvalue { i128, i1 } %10, 1
; CHECK: %newloaded = extractvalue { i128, i1 } %10, 0
; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
; CHECK:atomicrmw.end:
; CHECK: ret i128 %newloaded
define i128 @test_add_i128(i128* %arg, i128 %val) {
%ret = atomicrmw add i128* %arg, i128 %val seq_cst
ret i128 %ret
}

;; Ensure that non-integer types get bitcast correctly on the way in and out of a libcall:

; CHECK-LABEL: @test_load_double(
; CHECK: %1 = bitcast double* %arg to i8*
; CHECK: %2 = call i64 @__atomic_load_8(i8* %1, i32 5)
; CHECK: %3 = bitcast i64 %2 to double
; CHECK: ret double %3
define double @test_load_double(double* %arg, double %val) {
%1 = load atomic double, double* %arg seq_cst, align 16
ret double %1
}

; CHECK-LABEL: @test_store_double(
; CHECK: %1 = bitcast double* %arg to i8*
; CHECK: %2 = bitcast double %val to i64
; CHECK: call void @__atomic_store_8(i8* %1, i64 %2, i32 5)
; CHECK: ret void
define void @test_store_double(double* %arg, double %val) {
store atomic double %val, double* %arg seq_cst, align 16
ret void
}

; CHECK-LABEL: @test_cmpxchg_ptr(
; CHECK: %1 = bitcast i16** %arg to i8*
; CHECK: %2 = alloca i16*, align 4
; CHECK: %3 = bitcast i16** %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 4, i8* %3)
; CHECK: store i16* %old, i16** %2, align 4
; CHECK: %4 = ptrtoint i16* %new to i32
; CHECK: %5 = call zeroext i1 @__atomic_compare_exchange_4(i8* %1, i8* %3, i32 %4, i32 5, i32 2)
; CHECK: %6 = load i16*, i16** %2, align 4
; CHECK: call void @llvm.lifetime.end(i64 4, i8* %3)
; CHECK: %7 = insertvalue { i16*, i1 } undef, i16* %6, 0
; CHECK: %8 = insertvalue { i16*, i1 } %7, i1 %5, 1
; CHECK: %ret = extractvalue { i16*, i1 } %8, 0
; CHECK: ret i16* %ret
; CHECK: }
define i16* @test_cmpxchg_ptr(i16** %arg, i16* %old, i16* %new) {
%ret_succ = cmpxchg i16** %arg, i16* %old, i16* %new seq_cst acquire
%ret = extractvalue { i16*, i1 } %ret_succ, 0
ret i16* %ret
}

;; ...and for a non-integer type of large size too.

; CHECK-LABEL: @test_store_fp128
; CHECK: %1 = bitcast fp128* %arg to i8*
; CHECK: %2 = alloca fp128, align 8
; CHECK: %3 = bitcast fp128* %2 to i8*
; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
; CHECK: store fp128 %val, fp128* %2, align 8
; CHECK: call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
; CHECK: ret void
define void @test_store_fp128(fp128* %arg, fp128 %val) {
store atomic fp128 %val, fp128* %arg seq_cst, align 16
ret void
}

;; Unaligned loads and stores should be expanded to the generic
;; libcall, just like large loads/stores, and not a specialized one.
;; NOTE: atomicrmw and cmpxchg don't yet support an align attribute;
;; when such support is added, they should also be tested here.

; CHECK-LABEL: @test_unaligned_load_i16(
; CHECK: __atomic_load(
define i16 @test_unaligned_load_i16(i16* %arg) {
%ret = load atomic i16, i16* %arg seq_cst, align 1
ret i16 %ret
}

; CHECK-LABEL: @test_unaligned_store_i16(
; CHECK: __atomic_store(
define void @test_unaligned_store_i16(i16* %arg, i16 %val) {
store atomic i16 %val, i16* %arg seq_cst, align 1
ret void
}
2 changes: 2 additions & 0 deletions llvm/test/Transforms/AtomicExpand/SPARC/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not 'Sparc' in config.root.targets:
config.unsupported = True