Skip to content

Commit

Permalink
[SystemZ] Enable AtomicExpand pass
Browse files Browse the repository at this point in the history
The upcoming OpenMP support for SystemZ requires handling of IR insns
like `atomicrmw fadd`. Normally atomic float operations are expanded
by Clang and such insns do not occur, but OpenMP generates them
directly. Other architectures handle this using the AtomicExpand pass,
which SystemZ did not need so far. Enable it; implement atomicrmw
sub-operations uinc_wrap and udec_wrap by expanding them to a
compare-and-swap loop; add tests.

Currently AtomicExpand treats atomic load and stores of floats
pessimistically: it casts them to integers, which SystemZ does not
need, since the floating point load and store instructions are already
atomic. However, the way Clang currently expands them is pessimistic
as well, so this change does not make things worse. Optimizing
operations on atomic floats can be a separate change in the future.

This change does not create any differences the Linux kernel build.
  • Loading branch information
iii-i committed Oct 30, 2023
1 parent 77c6339 commit 89c0748
Show file tree
Hide file tree
Showing 26 changed files with 536 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,15 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
return false;
}

TargetLowering::AtomicExpansionKind
SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return (RMW->isFloatingPointOperation() ||
RMW->getOperation() == AtomicRMWInst::UIncWrap ||
RMW->getOperation() == AtomicRMWInst::UDecWrap)
? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
}

bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ class SystemZTargetLowering : public TargetLowering {
return VT != MVT::f64;
}
bool hasInlineStackProbe(const MachineFunction &MF) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ void SystemZPassConfig::addIRPasses() {
addPass(createLoopDataPrefetchPass());
}

addPass(createAtomicExpandPass());

TargetPassConfig::addIRPasses();
}

Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-load-06.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; Test float atomic loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define float @f1(ptr %src) {
; CHECK-LABEL: f1:
; CHECK: lgf [[R:%r[0-9]+]], 0(%r2)
; CHECK: sllg [[R]], [[R]], 32
; CHECK: ldgr %f0, [[R]]
; CHECK: br %r14
%val = load atomic float, ptr %src seq_cst, align 4
ret float %val
}
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-load-07.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
; Test double atomic loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define double @f1(ptr %src) {
; CHECK-LABEL: f1:
; CHECK: ld %f0, 0(%r2)
; CHECK: br %r14
%val = load atomic double, ptr %src seq_cst, align 8
ret double %val
}
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-load-08.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; Test long double atomic loads. Expect a libcall.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %ret, ptr %src) {
; CHECK-LABEL: f1:
; CHECK: lgr [[RET:%r[0-9]+]], %r2
; CHECK: la %r4, 160(%r15)
; CHECK: lghi %r2, 16
; CHECK: lhi %r5, 5
; CHECK: brasl %r14, __atomic_load@PLT
; CHECK: ld [[FL:%f[0-9]+]], 160(%r15)
; CHECK: ld [[FH:%f[0-9]+]], 168(%r15)
; CHECK: std [[FL]], 0([[RET]])
; CHECK: std [[FH]], 8([[RET]])
; CHECK: br %r14
%val = load atomic fp128, ptr %src seq_cst, align 8
store fp128 %val, ptr %ret, align 8
ret void
}
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-store-06.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; Test float atomic loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %src, float %val) {
; CHECK-LABEL: f1:
; CHECK: lgdr [[R:%r[0-9]+]], %f0
; CHECK: srlg [[R]], [[R]], 32
; CHECK: st [[R]], 0(%r2)
; CHECK: br %r14
store atomic float %val, ptr %src seq_cst, align 4
ret void
}
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-store-07.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
; Test double atomic stores.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %dst, double %val) {
; CHECK-LABEL: f1:
; CHECK: std %f0, 0(%r2)
; CHECK: br %r14
store atomic double %val, ptr %dst seq_cst, align 8
ret void
}
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-store-08.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; Test long double atomic stores. Expect a libcall.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %dst, ptr %src) {
; CHECK-LABEL: f1:
; CHECK: ld [[FL:%f[0-9]+]], 0(%r3)
; CHECK: ld [[FH:%f[0-9]+]], 8(%r3)
; CHECK: lgr %r3, %r2
; CHECK: std [[FL]], 160(%r15)
; CHECK: std [[FH]], 168(%r15)
; CHECK: la %r4, 160(%r15)
; CHECK: lghi %r2, 16
; CHECK: lhi %r5, 5
; CHECK: brasl %r14, __atomic_store@PLT
; CHECK: br %r14
%val = load fp128, ptr %src, align 8
store atomic fp128 %val, ptr %dst seq_cst, align 8
ret void
}
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fadd-01.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; Test atomic float addition. Expect a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define float @f1(ptr %src, float %b) {
; CHECK-LABEL: f1:
; CHECK: le [[F:%f[0-9]+]], 0(%r2)
; CHECK: [[L:\.L.+]]:
; CHECK: lgdr [[RI:%r[0-9]+]], [[F]]
; CHECK: aebr [[F]], %f0
; CHECK: lgdr [[RO:%r[0-9]+]], [[F]]
; CHECK: srlg [[RO]], [[RO]], 32
; CHECK: srlg [[RI]], [[RI]], 32
; CHECK: cs [[RI]], [[RO]], 0(%r2)
; CHECK: sllg [[RI]], [[RI]], 32
; CHECK: ldgr [[F]], [[RI]]
; CHECK: jl [[L]]
; CHECK: ler %f0, [[F]]
; CHECK: br %r14
%res = atomicrmw fadd ptr %src, float %b seq_cst
ret float %res
}
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fadd-02.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; Test atomic double addition. Expect a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define double @f1(ptr %src, double %b) {
; CHECK-LABEL: f1:
; CHECK: ld [[F:%f[0-9]+]], 0(%r2)
; CHECK: [[L:\.L.+]]:
; CHECK: lgdr [[RI:%r[0-9]+]], [[F]]
; CHECK: adbr [[F]], %f0
; CHECK: lgdr [[RO:%r[0-9]+]], [[F]]
; CHECK: csg [[RI]], [[RO]], 0(%r2)
; CHECK: ldgr [[F]], [[RI]]
; CHECK: jl [[L]]
; CHECK: ldr %f0, [[F]]
; CHECK: br %r14
%res = atomicrmw fadd ptr %src, double %b seq_cst
ret double %res
}
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fadd-03.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; Test atomic long double addition. Expect a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %ret, ptr %src, ptr %b) {
; CHECK-LABEL: f1:
; CHECK: [[FBL:%f[0-9]+]], 0(%r4)
; CHECK: [[FBH:%f[0-9]+]], 8(%r4)
; CHECK: [[FSL:%f[0-9]+]], 0(%r3)
; CHECK: [[FSH:%f[0-9]+]], 8(%r3)
; CHECK: [[LABEL:\.L.+]]:
; CHECK: lgdr [[RISH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[RISL:%r[0-9]+]], [[FSL]]
; CHECK: axbr [[FSL]], [[FBL]]
; CHECK: lgdr [[ROSH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[ROSL:%r[0-9]+]], [[FSL]]
; CHECK: cdsg [[RISL]], [[ROSL]], 0(%r3)
; CHECK: stg [[RISH]], 168(%r15)
; CHECK: stg [[RISL]], 160(%r15)
; CHECK: ld [[FSL]], 160(%r15)
; CHECK: ld [[FSH]], 168(%r15)
; CHECK: jl [[LABEL]]
; CHECK: std [[FSL]], 0(%r2)
; CHECK: std [[FSH]], 8(%r2)
; CHECK: br %r14
%val = load fp128, ptr %b
%res = atomicrmw fadd ptr %src, fp128 %val seq_cst
store fp128 %res, ptr %ret
ret void
}
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmax-01.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; Test atomic float maximum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define float @f1(ptr %src, float %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[SRC:%r[0-9]+]], %r2
; CHECK: le [[FSRC:%f[0-9]+]], 0(%r2)
; CHECK: ler [[FB:%f[0-9]+]], %f0
; CHECK: [[L:\.L.+]]:
; CHECK: ler %f0, [[FSRC]]
; CHECK: ler %f2, [[FB]]
; CHECK: brasl %r14, fmaxf@PLT
; CHECK: lgdr [[RO:%r[0-9]+]], %f0
; CHECK: srlg [[RO]], [[RO]], 32
; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]]
; CHECK: srlg [[RI]], [[RI]], 32
; CHECK: cs [[RI]], [[RO]], 0([[SRC]])
; CHECK: sllg [[RO]], [[RI]], 32
; CHECK: ldgr [[FSRC]], [[RO]]
; CHECK: jl [[L]]
; CHECK: ler %f0, [[FSRC]]
; CHECK: br %r14
%res = atomicrmw fmax ptr %src, float %b seq_cst
ret float %res
}
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmax-02.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; Test atomic double maximum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define double @f1(ptr %src, double %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[RB:%r[0-9]+]], %r2
; CHECK: ld [[FB:%f[0-9]+]], 0(%r2)
; CHECK: ldr [[FSRC:%f[0-9]+]], %f0
; CHECK: [[L:\.L.+]]:
; CHECK: ldr %f0, [[FB]]
; CHECK: ldr %f2, [[FSRC]]
; CHECK: brasl %r14, fmax@PLT
; CHECK: lgdr [[RO:%r[0-9]+]], %f0
; CHECK: lgdr [[RI:%r[0-9]+]], [[FB]]
; CHECK: csg [[RI]], [[RO]], 0([[RB]])
; CHECK: ldgr [[FB]], [[RI]]
; CHECK: jl [[L]]
; CHECK: ldr %f0, [[FB]]
; CHECK: br %r14
%res = atomicrmw fmax ptr %src, double %b seq_cst
ret double %res
}
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; Test atomic long double maximum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %ret, ptr %src, ptr %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[SRC:%r[0-9]+]], %r3
; CHECK: ld [[FBL:%f[0-9]+]], 0(%r4)
; CHECK: ld [[FBH:%f[0-9]+]], 8(%r4)
; CHECK: ld [[FSL:%f[0-9]+]], 0(%r3)
; CHECK: ld [[FSH:%f[0-9]+]], 8(%r3)
; CHECK: lgr [[RET:%r[0-9]+]], %r2
; CHECK: [[L:\.L.+]]:
; CHECK: std [[FBL]], 160(%r15)
; CHECK: std [[FBH]], 168(%r15)
; CHECK: la %r2, 192(%r15)
; CHECK: la %r3, 176(%r15)
; CHECK: la %r4, 160(%r15)
; CHECK: std [[FSL]], 176(%r15)
; CHECK: std [[FSH]], 184(%r15)
; CHECK: brasl %r14, fmaxl@PLT
; CHECK: ld [[FL:%f[0-9]+]], 192(%r15)
; CHECK: ld [[FH:%f[0-9]+]], 200(%r15)
; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]]
; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]]
; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]]
; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]])
; CHECK: stg [[RSH]], 216(%r15)
; CHECK: stg [[RSL]], 208(%r15)
; CHECK: ld [[FSL]], 208(%r15)
; CHECK: ld [[FSH]], 216(%r15)
; CHECK: jl [[L]]
; CHECK: std [[FSL]], 0([[RET]])
; CHECK: std [[FSH]], 8([[RET]])
; CHECK: br %r14
%val = load fp128, ptr %b
%res = atomicrmw fmax ptr %src, fp128 %val seq_cst
store fp128 %res, ptr %ret
ret void
}
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmin-01.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; Test atomic float minimum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define float @f1(ptr %src, float %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[SRC:%r[0-9]+]], %r2
; CHECK: le [[FSRC:%f[0-9]+]], 0(%r2)
; CHECK: ler [[FB:%f[0-9]+]], %f0
; CHECK: [[L:\.L.+]]:
; CHECK: ler %f0, [[FSRC]]
; CHECK: ler %f2, [[FB]]
; CHECK: brasl %r14, fminf@PLT
; CHECK: lgdr [[RO:%r[0-9]+]], %f0
; CHECK: srlg [[RO]], [[RO]], 32
; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]]
; CHECK: srlg [[RI]], [[RI]], 32
; CHECK: cs [[RI]], [[RO]], 0([[SRC]])
; CHECK: sllg [[RO]], [[RI]], 32
; CHECK: ldgr [[FSRC]], [[RO]]
; CHECK: jl [[L]]
; CHECK: ler %f0, [[FSRC]]
; CHECK: br %r14
%res = atomicrmw fmin ptr %src, float %b seq_cst
ret float %res
}
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmin-02.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; Test atomic double minimum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define double @f1(ptr %src, double %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[SRC:%r[0-9]+]], %r2
; CHECK: ld [[FSRC:%f[0-9]+]], 0(%r2)
; CHECK: ldr [[FB:%f[0-9]+]], %f0
; CHECK: [[L:\.L.+]]:
; CHECK: ldr %f0, [[FSRC]]
; CHECK: ldr %f2, [[FB]]
; CHECK: brasl %r14, fmin@PLT
; CHECK: lgdr [[RO:%r[0-9]+]], %f0
; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]]
; CHECK: csg [[RI]], [[RO]], 0([[SRC]])
; CHECK: ldgr [[FSRC]], [[RI]]
; CHECK: jl [[L]]
; CHECK: ldr %f0, [[FSRC]]
; CHECK: br %r14
%res = atomicrmw fmin ptr %src, double %b seq_cst
ret double %res
}
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; Test atomic long double minimum.
; Expect a libcall in a compare-and-swap loop.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %ret, ptr %src, ptr %b) {
; CHECK-LABEL: f1:
; CHECK: lgr [[SRC:%r[0-9]+]], %r3
; CHECK: ld [[FBL:%f[0-9]+]], 0(%r4)
; CHECK: ld [[FBH:%f[0-9]+]], 8(%r4)
; CHECK: ld [[FSL:%f[0-9]+]], 0(%r3)
; CHECK: ld [[FSH:%f[0-9]+]], 8(%r3)
; CHECK: lgr [[RET:%r[0-9]+]], %r2
; CHECK: [[L:\.L.+]]:
; CHECK: std [[FBL]], 160(%r15)
; CHECK: std [[FBH]], 168(%r15)
; CHECK: la %r2, 192(%r15)
; CHECK: la %r3, 176(%r15)
; CHECK: la %r4, 160(%r15)
; CHECK: std [[FSL]], 176(%r15)
; CHECK: std [[FSH]], 184(%r15)
; CHECK: brasl %r14, fminl@PLT
; CHECK: ld [[FL:%f[0-9]+]], 192(%r15)
; CHECK: ld [[FH:%f[0-9]+]], 200(%r15)
; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]]
; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]]
; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]]
; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]])
; CHECK: stg [[RSH]], 216(%r15)
; CHECK: stg [[RSL]], 208(%r15)
; CHECK: ld [[FSL]], 208(%r15)
; CHECK: ld [[FSH]], 216(%r15)
; CHECK: jl [[L]]
; CHECK: std [[FSL]], 0([[RET]])
; CHECK: std [[FSH]], 8([[RET]])
; CHECK: br %r14
%val = load fp128, ptr %b
%res = atomicrmw fmin ptr %src, fp128 %val seq_cst
store fp128 %res, ptr %ret
ret void
}
Loading

0 comments on commit 89c0748

Please sign in to comment.