Skip to content

Commit

Permalink
Revert "Allow rematerialization of virtual reg uses"
Browse files Browse the repository at this point in the history
This reverts commit 877572c which
introduced PR51516.
  • Loading branch information
petrhosek committed Aug 18, 2021
1 parent 1c84167 commit 2d4470a
Show file tree
Hide file tree
Showing 42 changed files with 4,224 additions and 4,239 deletions.
12 changes: 6 additions & 6 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Expand Up @@ -117,11 +117,10 @@ class TargetInstrInfo : public MCInstrInfo {
const MachineFunction &MF) const;

/// Return true if the instruction is trivially rematerializable, meaning it
/// has no side effects. Uses of constants and unallocatable physical
/// registers are always trivial to rematerialize so that the instructions
/// result is independent of the place in the function. Uses of virtual
/// registers are allowed but it is caller's responsility to ensure these
/// operands are valid at the point the instruction is beeing moved.
/// has no side effects and requires no operands that aren't always available.
/// This means the only allowed uses are constants and unallocatable physical
/// registers so that the instructions result is independent of the place
/// in the function.
bool isTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA = nullptr) const {
return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
Expand All @@ -141,7 +140,8 @@ class TargetInstrInfo : public MCInstrInfo {
/// set, this hook lets the target specify whether the instruction is actually
/// trivially rematerializable, taking into consideration its operands. This
/// predicate must return false if the instruction has any side effects other
/// than producing a value.
/// than producing a value, or if it requres any address registers that are
/// not always available.
/// Requirements must be check as stated in isTriviallyReMaterializable() .
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const {
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/CodeGen/TargetInstrInfo.cpp
Expand Up @@ -921,8 +921,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
const MachineRegisterInfo &MRI = MF.getRegInfo();

// Remat clients assume operand 0 is the defined register.
if (!MI.getNumOperands() || !MI.getOperand(0).isReg() ||
MI.getOperand(0).isTied())
if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
return false;
Register DefReg = MI.getOperand(0).getReg();

Expand Down Expand Up @@ -984,6 +983,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// same virtual register, though.
if (MO.isDef() && Reg != DefReg)
return false;

// Don't allow any virtual-register uses. Rematting an instruction with
// virtual register uses would length the live ranges of the uses, which
// is not necessarily a good idea, certainly not "trivial".
if (MO.isUse())
return false;
}

// Everything checked out.
Expand Down
60 changes: 0 additions & 60 deletions llvm/test/CodeGen/AMDGPU/remat-sop.mir
Expand Up @@ -51,66 +51,6 @@ body: |
S_NOP 0, implicit %2
S_ENDPGM 0
...
# The liverange of %0 covers a point of rematerialization, source value is
# availabe.
---
name: test_remat_s_mov_b32_vreg_src_long_lr
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
; GCN: renamable $sgpr0 = IMPLICIT_DEF
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN: S_NOP 0, implicit killed renamable $sgpr1
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN: S_NOP 0, implicit killed renamable $sgpr1
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN: S_NOP 0, implicit killed renamable $sgpr1
; GCN: S_NOP 0, implicit killed renamable $sgpr0
; GCN: S_ENDPGM 0
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32 = S_MOV_B32 %0:sreg_32
%2:sreg_32 = S_MOV_B32 %0:sreg_32
%3:sreg_32 = S_MOV_B32 %0:sreg_32
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
S_NOP 0, implicit %0
S_ENDPGM 0
...
# The liverange of %0 does not cover a point of rematerialization, source value is
# unavailabe and we do not want to artificially extend the liverange.
---
name: test_no_remat_s_mov_b32_vreg_src_short_lr
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
; GCN: renamable $sgpr0 = IMPLICIT_DEF
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
; GCN: S_NOP 0, implicit killed renamable $sgpr1
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
; GCN: S_NOP 0, implicit killed renamable $sgpr1
; GCN: S_NOP 0, implicit killed renamable $sgpr0
; GCN: S_ENDPGM 0
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32 = S_MOV_B32 %0:sreg_32
%2:sreg_32 = S_MOV_B32 %0:sreg_32
%3:sreg_32 = S_MOV_B32 %0:sreg_32
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
S_ENDPGM 0
...
---
name: test_remat_s_mov_b64
tracksRegLiveness: true
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
Expand Up @@ -29,20 +29,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
; ENABLE-NEXT: pophs {r11, pc}
; ENABLE-NEXT: .LBB0_3: @ %while.body.preheader
; ENABLE-NEXT: movw r12, :lower16:skip
; ENABLE-NEXT: sub r3, r1, #1
; ENABLE-NEXT: sub r1, r1, #1
; ENABLE-NEXT: movt r12, :upper16:skip
; ENABLE-NEXT: .LBB0_4: @ %while.body
; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: ldrb r1, [r0]
; ENABLE-NEXT: ldrb r1, [r12, r1]
; ENABLE-NEXT: add r0, r0, r1
; ENABLE-NEXT: sub r1, r3, #1
; ENABLE-NEXT: cmp r1, r3
; ENABLE-NEXT: ldrb r3, [r0]
; ENABLE-NEXT: ldrb r3, [r12, r3]
; ENABLE-NEXT: add r0, r0, r3
; ENABLE-NEXT: sub r3, r1, #1
; ENABLE-NEXT: cmp r3, r1
; ENABLE-NEXT: bhs .LBB0_6
; ENABLE-NEXT: @ %bb.5: @ %while.body
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLE-NEXT: cmp r0, r2
; ENABLE-NEXT: mov r3, r1
; ENABLE-NEXT: mov r1, r3
; ENABLE-NEXT: blo .LBB0_4
; ENABLE-NEXT: .LBB0_6: @ %if.end29
; ENABLE-NEXT: pop {r11, pc}
Expand Down Expand Up @@ -119,20 +119,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
; DISABLE-NEXT: pophs {r11, pc}
; DISABLE-NEXT: .LBB0_3: @ %while.body.preheader
; DISABLE-NEXT: movw r12, :lower16:skip
; DISABLE-NEXT: sub r3, r1, #1
; DISABLE-NEXT: sub r1, r1, #1
; DISABLE-NEXT: movt r12, :upper16:skip
; DISABLE-NEXT: .LBB0_4: @ %while.body
; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: ldrb r1, [r0]
; DISABLE-NEXT: ldrb r1, [r12, r1]
; DISABLE-NEXT: add r0, r0, r1
; DISABLE-NEXT: sub r1, r3, #1
; DISABLE-NEXT: cmp r1, r3
; DISABLE-NEXT: ldrb r3, [r0]
; DISABLE-NEXT: ldrb r3, [r12, r3]
; DISABLE-NEXT: add r0, r0, r3
; DISABLE-NEXT: sub r3, r1, #1
; DISABLE-NEXT: cmp r3, r1
; DISABLE-NEXT: bhs .LBB0_6
; DISABLE-NEXT: @ %bb.5: @ %while.body
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
; DISABLE-NEXT: cmp r0, r2
; DISABLE-NEXT: mov r3, r1
; DISABLE-NEXT: mov r1, r3
; DISABLE-NEXT: blo .LBB0_4
; DISABLE-NEXT: .LBB0_6: @ %if.end29
; DISABLE-NEXT: pop {r11, pc}
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/ARM/funnel-shift-rot.ll
Expand Up @@ -73,13 +73,13 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
; SCALAR-NEXT: push {r4, r5, r11, lr}
; SCALAR-NEXT: rsb r3, r2, #0
; SCALAR-NEXT: and r4, r2, #63
; SCALAR-NEXT: and r12, r3, #63
; SCALAR-NEXT: rsb r3, r12, #32
; SCALAR-NEXT: and lr, r3, #63
; SCALAR-NEXT: rsb r3, lr, #32
; SCALAR-NEXT: lsl r2, r0, r4
; SCALAR-NEXT: lsr lr, r0, r12
; SCALAR-NEXT: orr r3, lr, r1, lsl r3
; SCALAR-NEXT: subs lr, r12, #32
; SCALAR-NEXT: lsrpl r3, r1, lr
; SCALAR-NEXT: lsr r12, r0, lr
; SCALAR-NEXT: orr r3, r12, r1, lsl r3
; SCALAR-NEXT: subs r12, lr, #32
; SCALAR-NEXT: lsrpl r3, r1, r12
; SCALAR-NEXT: subs r5, r4, #32
; SCALAR-NEXT: movwpl r2, #0
; SCALAR-NEXT: cmp r5, #0
Expand All @@ -88,8 +88,8 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
; SCALAR-NEXT: lsr r3, r0, r3
; SCALAR-NEXT: orr r3, r3, r1, lsl r4
; SCALAR-NEXT: lslpl r3, r0, r5
; SCALAR-NEXT: lsr r0, r1, r12
; SCALAR-NEXT: cmp lr, #0
; SCALAR-NEXT: lsr r0, r1, lr
; SCALAR-NEXT: cmp r12, #0
; SCALAR-NEXT: movwpl r0, #0
; SCALAR-NEXT: orr r1, r3, r0
; SCALAR-NEXT: mov r0, r2
Expand Down Expand Up @@ -245,15 +245,15 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: and r12, r2, #63
; CHECK-NEXT: and lr, r2, #63
; CHECK-NEXT: rsb r2, r2, #0
; CHECK-NEXT: rsb r3, r12, #32
; CHECK-NEXT: rsb r3, lr, #32
; CHECK-NEXT: and r4, r2, #63
; CHECK-NEXT: lsr lr, r0, r12
; CHECK-NEXT: orr r3, lr, r1, lsl r3
; CHECK-NEXT: subs lr, r12, #32
; CHECK-NEXT: lsr r12, r0, lr
; CHECK-NEXT: orr r3, r12, r1, lsl r3
; CHECK-NEXT: subs r12, lr, #32
; CHECK-NEXT: lsl r2, r0, r4
; CHECK-NEXT: lsrpl r3, r1, lr
; CHECK-NEXT: lsrpl r3, r1, r12
; CHECK-NEXT: subs r5, r4, #32
; CHECK-NEXT: movwpl r2, #0
; CHECK-NEXT: cmp r5, #0
Expand All @@ -262,8 +262,8 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK-NEXT: lsr r3, r0, r3
; CHECK-NEXT: orr r3, r3, r1, lsl r4
; CHECK-NEXT: lslpl r3, r0, r5
; CHECK-NEXT: lsr r0, r1, r12
; CHECK-NEXT: cmp lr, #0
; CHECK-NEXT: lsr r0, r1, lr
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: movwpl r0, #0
; CHECK-NEXT: orr r1, r0, r3
; CHECK-NEXT: mov r0, r2
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/ARM/funnel-shift.ll
Expand Up @@ -224,31 +224,31 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: bl __aeabi_uldivmod
; CHECK-NEXT: add r0, r2, #27
; CHECK-NEXT: lsl r2, r7, #27
; CHECK-NEXT: and r12, r0, #63
; CHECK-NEXT: lsl r6, r6, #27
; CHECK-NEXT: and r1, r0, #63
; CHECK-NEXT: lsl r2, r7, #27
; CHECK-NEXT: orr r7, r6, r7, lsr #5
; CHECK-NEXT: rsb r3, r12, #32
; CHECK-NEXT: lsr r2, r2, r12
; CHECK-NEXT: mov r6, #63
; CHECK-NEXT: orr r2, r2, r7, lsl r3
; CHECK-NEXT: subs r3, r12, #32
; CHECK-NEXT: rsb r3, r1, #32
; CHECK-NEXT: lsr r2, r2, r1
; CHECK-NEXT: subs r12, r1, #32
; CHECK-NEXT: bic r6, r6, r0
; CHECK-NEXT: orr r2, r2, r7, lsl r3
; CHECK-NEXT: lsl r5, r9, #1
; CHECK-NEXT: lsrpl r2, r7, r3
; CHECK-NEXT: subs r1, r6, #32
; CHECK-NEXT: lsrpl r2, r7, r12
; CHECK-NEXT: lsl r0, r5, r6
; CHECK-NEXT: lsl r4, r8, #1
; CHECK-NEXT: subs r4, r6, #32
; CHECK-NEXT: lsl r3, r8, #1
; CHECK-NEXT: movwpl r0, #0
; CHECK-NEXT: orr r4, r4, r9, lsr #31
; CHECK-NEXT: orr r3, r3, r9, lsr #31
; CHECK-NEXT: orr r0, r0, r2
; CHECK-NEXT: rsb r2, r6, #32
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: lsr r1, r7, r1
; CHECK-NEXT: lsr r2, r5, r2
; CHECK-NEXT: orr r2, r2, r4, lsl r6
; CHECK-NEXT: lslpl r2, r5, r1
; CHECK-NEXT: lsr r1, r7, r12
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: orr r2, r2, r3, lsl r6
; CHECK-NEXT: lslpl r2, r5, r4
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: movwpl r1, #0
; CHECK-NEXT: orr r1, r2, r1
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
Expand Up @@ -91,17 +91,17 @@ define void @i56_or(i56* %a) {
; BE-LABEL: i56_or:
; BE: @ %bb.0:
; BE-NEXT: mov r1, r0
; BE-NEXT: ldr r12, [r0]
; BE-NEXT: ldrh r2, [r1, #4]!
; BE-NEXT: ldrb r3, [r1, #2]
; BE-NEXT: orr r2, r3, r2, lsl #8
; BE-NEXT: ldr r3, [r0]
; BE-NEXT: orr r2, r2, r3, lsl #24
; BE-NEXT: orr r12, r2, #384
; BE-NEXT: strb r12, [r1, #2]
; BE-NEXT: lsr r2, r12, #8
; BE-NEXT: strh r2, [r1]
; BE-NEXT: bic r1, r3, #255
; BE-NEXT: orr r1, r1, r12, lsr #24
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: strb r2, [r1, #2]
; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strh r3, [r1]
; BE-NEXT: bic r1, r12, #255
; BE-NEXT: orr r1, r1, r2, lsr #24
; BE-NEXT: str r1, [r0]
; BE-NEXT: mov pc, lr
%aa = load i56, i56* %a
Expand All @@ -127,13 +127,13 @@ define void @i56_and_or(i56* %a) {
; BE-NEXT: ldrb r3, [r1, #2]
; BE-NEXT: strb r2, [r1, #2]
; BE-NEXT: orr r2, r3, r12, lsl #8
; BE-NEXT: ldr r3, [r0]
; BE-NEXT: orr r2, r2, r3, lsl #24
; BE-NEXT: orr r12, r2, #384
; BE-NEXT: lsr r2, r12, #8
; BE-NEXT: strh r2, [r1]
; BE-NEXT: bic r1, r3, #255
; BE-NEXT: orr r1, r1, r12, lsr #24
; BE-NEXT: ldr r12, [r0]
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strh r3, [r1]
; BE-NEXT: bic r1, r12, #255
; BE-NEXT: orr r1, r1, r2, lsr #24
; BE-NEXT: str r1, [r0]
; BE-NEXT: mov pc, lr

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/ARM/neon-copy.ll
Expand Up @@ -1340,16 +1340,16 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: vmov.u16 r1, d0[1]
; CHECK-NEXT: and r12, r0, #3
; CHECK-NEXT: and r0, r0, #3
; CHECK-NEXT: vmov.u16 r2, d0[2]
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: vmov.u16 r3, d0[3]
; CHECK-NEXT: orr r0, r0, r12, lsl #1
; CHECK-NEXT: mov r3, sp
; CHECK-NEXT: vmov.u16 r12, d0[3]
; CHECK-NEXT: orr r0, r3, r0, lsl #1
; CHECK-NEXT: vst1.16 {d0[0]}, [r0:16]
; CHECK-NEXT: vldr d0, [sp]
; CHECK-NEXT: vmov.16 d0[1], r1
; CHECK-NEXT: vmov.16 d0[2], r2
; CHECK-NEXT: vmov.16 d0[3], r3
; CHECK-NEXT: vmov.16 d0[3], r12
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: bx lr
%tmp = extractelement <8 x i16> %x, i32 0
Expand Down

0 comments on commit 2d4470a

Please sign in to comment.