Skip to content

Commit

Permalink
[AArch64] Add patterns for relaxed atomic ld/st into fp registers
Browse files Browse the repository at this point in the history
Adds patterns to match integer loads/stores bitcasted to fp values

Fixes #52927

Differential Revision: https://reviews.llvm.org/D117573
  • Loading branch information
danilaml committed Jan 25, 2022
1 parent d95cf1f commit 153b1e3
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 0 deletions.
60 changes: 60 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrAtomics.td
Expand Up @@ -102,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(LDURXi GPR64sp:$Rn, simm9:$offset)>;

// FP 32-bit loads
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend32:$extend))))),
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend32:$extend))))),
(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
uimm12s8:$offset))))),
(LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
(LDURSi GPR64sp:$Rn, simm9:$offset)>;

// FP 64-bit loads
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;

//===----------------------------------
// Atomic stores
//===----------------------------------
Expand Down Expand Up @@ -196,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
(STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;

// FP 32-bit stores
def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend32:$extend),
(i32 (bitconvert (f32 FPR32Op:$val)))),
(STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend32:$extend),
(i32 (bitconvert (f32 FPR32Op:$val)))),
(STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
def : Pat<(relaxed_store<atomic_store_32>
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
(STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
def : Pat<(relaxed_store<atomic_store_32>
(am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
(STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;

// FP 64-bit stores
def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend),
(i64 (bitconvert (f64 FPR64Op:$val)))),
(STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend),
(i64 (bitconvert (f64 FPR64Op:$val)))),
(STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
def : Pat<(relaxed_store<atomic_store_64>
(am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
(STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
def : Pat<(relaxed_store<atomic_store_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
(STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;

//===----------------------------------
// Low-level exclusive operations
//===----------------------------------
Expand Down
94 changes: 94 additions & 0 deletions llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
@@ -0,0 +1,94 @@
; PR52927: Relaxed atomics can load to/store from fp regs directly
; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s

define float @atomic_load_relaxed_f32(float* %p, i32 %off32, i64 %off64) #0 {
; CHECK-LABEL: atomic_load_relaxed_f32:
%ptr_unsigned = getelementptr float, float* %p, i32 4095
%val_unsigned = load atomic float, float* %ptr_unsigned monotonic, align 4
; CHECK: ldr {{s[0-9]+}}, [x0, #16380]

%ptr_regoff = getelementptr float, float* %p, i32 %off32
%val_regoff = load atomic float, float* %ptr_regoff unordered, align 4
%tot1 = fadd float %val_unsigned, %val_regoff
; CHECK: ldr {{s[0-9]+}}, [x0, w1, sxtw #2]

%ptr_regoff64 = getelementptr float, float* %p, i64 %off64
%val_regoff64 = load atomic float, float* %ptr_regoff64 monotonic, align 4
%tot2 = fadd float %tot1, %val_regoff64
; CHECK: ldr {{s[0-9]+}}, [x0, x2, lsl #2]

%ptr_unscaled = getelementptr float, float* %p, i32 -64
%val_unscaled = load atomic float, float* %ptr_unscaled unordered, align 4
%tot3 = fadd float %tot2, %val_unscaled
; CHECK: ldur {{s[0-9]+}}, [x0, #-256]

ret float %tot3
}

define double @atomic_load_relaxed_f64(double* %p, i32 %off32, i64 %off64) #0 {
; CHECK-LABEL: atomic_load_relaxed_f64:
%ptr_unsigned = getelementptr double, double* %p, i32 4095
%val_unsigned = load atomic double, double* %ptr_unsigned monotonic, align 8
; CHECK: ldr {{d[0-9]+}}, [x0, #32760]

%ptr_regoff = getelementptr double, double* %p, i32 %off32
%val_regoff = load atomic double, double* %ptr_regoff unordered, align 8
%tot1 = fadd double %val_unsigned, %val_regoff
; CHECK: ldr {{d[0-9]+}}, [x0, w1, sxtw #3]

%ptr_regoff64 = getelementptr double, double* %p, i64 %off64
%val_regoff64 = load atomic double, double* %ptr_regoff64 monotonic, align 8
%tot2 = fadd double %tot1, %val_regoff64
; CHECK: ldr {{d[0-9]+}}, [x0, x2, lsl #3]

%ptr_unscaled = getelementptr double, double* %p, i32 -32
%val_unscaled = load atomic double, double* %ptr_unscaled unordered, align 8
%tot3 = fadd double %tot2, %val_unscaled
; CHECK: ldur {{d[0-9]+}}, [x0, #-256]

ret double %tot3
}

define void @atomic_store_relaxed_f32(float* %p, i32 %off32, i64 %off64, float %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_f32:
%ptr_unsigned = getelementptr float, float* %p, i32 4095
store atomic float %val, float* %ptr_unsigned monotonic, align 4
; CHECK: str {{s[0-9]+}}, [x0, #16380]

%ptr_regoff = getelementptr float, float* %p, i32 %off32
store atomic float %val, float* %ptr_regoff unordered, align 4
; CHECK: str {{s[0-9]+}}, [x0, w1, sxtw #2]

%ptr_regoff64 = getelementptr float, float* %p, i64 %off64
store atomic float %val, float* %ptr_regoff64 monotonic, align 4
; CHECK: str {{s[0-9]+}}, [x0, x2, lsl #2]

%ptr_unscaled = getelementptr float, float* %p, i32 -64
store atomic float %val, float* %ptr_unscaled unordered, align 4
; CHECK: stur {{s[0-9]+}}, [x0, #-256]

ret void
}

define void @atomic_store_relaxed_f64(double* %p, i32 %off32, i64 %off64, double %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_f64:
%ptr_unsigned = getelementptr double, double* %p, i32 4095
store atomic double %val, double* %ptr_unsigned monotonic, align 8
; CHECK: str {{d[0-9]+}}, [x0, #32760]

%ptr_regoff = getelementptr double, double* %p, i32 %off32
store atomic double %val, double* %ptr_regoff unordered, align 8
; CHECK: str {{d[0-9]+}}, [x0, w1, sxtw #3]

%ptr_regoff64 = getelementptr double, double* %p, i64 %off64
store atomic double %val, double* %ptr_regoff64 unordered, align 8
; CHECK: str {{d[0-9]+}}, [x0, x2, lsl #3]

%ptr_unscaled = getelementptr double, double* %p, i32 -32
store atomic double %val, double* %ptr_unscaled monotonic, align 8
; CHECK: stur {{d[0-9]+}}, [x0, #-256]

ret void
}

attributes #0 = { nounwind }

0 comments on commit 153b1e3

Please sign in to comment.