Skip to content

Commit

Permalink
[AArch64][GlobalISel] Fix atomic truncating stores from generating in…
Browse files Browse the repository at this point in the history
…valid copies.

If the source reg is a 64b vreg, then we need to emit a subreg copy to a 32b
gpr before we select sub-64b variants like STLRW.
  • Loading branch information
aemerson committed Nov 10, 2021
1 parent d71bb6a commit af4dc63
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 0 deletions.
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Expand Up @@ -2770,6 +2770,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
Register ValReg = LdSt.getReg(0);
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
// Emit a subreg copy of 32 bits.
Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
.addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
I.getOperand(0).setReg(NewVal);
}
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Expand Down
150 changes: 150 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-truncstore-atomic.mir
@@ -0,0 +1,150 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s

---
name: truncstore_atomic_32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_32
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRW [[COPY2]], [[COPY]] :: (store release (s32))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s32))
bb.3:
RET_ReallyLR
...
---
name: truncstore_atomic_16
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_16
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRH [[COPY2]], [[COPY]] :: (store release (s16))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s16))
bb.3:
RET_ReallyLR
...
---
name: truncstore_atomic_8
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_8
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRB [[COPY2]], [[COPY]] :: (store release (s8))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s8))
bb.3:
RET_ReallyLR
...

0 comments on commit af4dc63

Please sign in to comment.