Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@ bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
return false;
}

// Don't move across instructions if they are guaranteed to be ordered, e.g.
// volatile and ordered atomic
if (MI->hasOrderedMemoryRef() && It->hasOrderedMemoryRef()) {
LLVM_DEBUG(
dbgs()
<< "Cannot move across instruction that is guaranteed to be ordered: "
<< *It);
return false;
}
}

return true;
Expand Down Expand Up @@ -334,6 +344,10 @@ bool RISCVPreAllocZilsdOpt::rescheduleOps(
Distance > MaxRescheduleDistance)
continue;

// If MI0 comes later, it's not able fold if the memory order matters.
if (!MI1IsLater && MI0->hasOrderedMemoryRef() && MI1->hasOrderedMemoryRef())
continue;

// Move the instruction to the target position
MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator();
++InsertPos;
Expand Down Expand Up @@ -400,15 +414,10 @@ bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) {
return false;

// When no memory operands are present, conservatively assume unaligned,
// volatile, unfoldable.
// unfoldable.
if (!MI.hasOneMemOperand())
return false;

const MachineMemOperand *MMO = *MI.memoperands_begin();

if (MMO->isVolatile() || MMO->isAtomic())
return false;

// sw <undef> could probably be eliminated entirely, but for now we just want
// to avoid making a mess of it.
if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
Expand Down
209 changes: 204 additions & 5 deletions llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,26 @@
ret i32 %5
}

define void @invalid_volatile_loads() {
ret void
}

define void @atomic_loads() {
ret void
}

define void @invalid_atomic_loads() {
ret void
}

define void @interleave_atomic_volatile_loads() {
ret void
}

define void @interleave_atomic_volatile_loads2() {
ret void
}

define i32 @store_dependency(ptr %0, i32 %1) {
%3 = load i32, ptr %0, align 4
%4 = getelementptr inbounds i32, ptr %0, i32 1
Expand Down Expand Up @@ -895,7 +915,7 @@ body: |

...
---
# Test with volatile loads - should not combine
# Test with valid volatile loads
name: volatile_loads
alignment: 4
tracksRegLiveness: true
Expand All @@ -918,17 +938,196 @@ body: |
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
; Volatile loads should not be combined
%1:gpr = LW %0, 0 :: (volatile load (s32))
%2:gpr = LW %0, 4 :: (volatile load (s32))
%3:gpr = ADD %1, %2
PseudoRET

...
---
# Test with invalid volatile loads
name: invalid_volatile_loads
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x10', virtual-reg: '%0' }
body: |
bb.0:
liveins: $x10

; CHECK-LABEL: name: invalid_volatile_loads
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
; CHECK-NEXT: PseudoRET
;
; CHECK-4BYTE-LABEL: name: invalid_volatile_loads
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
%1:gpr = LW %0, 0 :: (volatile load (s32))
%2:gpr = LW %0, 8 :: (volatile load (s32))
%3:gpr = LW %0, 4 :: (volatile load (s32))
%4:gpr = LW %0, 12 :: (volatile load (s32))
PseudoRET

...
---
# Test with valid atomic loads
name: atomic_loads
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x10', virtual-reg: '%0' }
body: |
bb.0:
liveins: $x10

; CHECK-LABEL: name: atomic_loads
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
; CHECK-NEXT: PseudoRET
;
; CHECK-4BYTE-LABEL: name: atomic_loads
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
%1:gpr = LW %0, 0 :: (load monotonic (s32))
%2:gpr = LW %0, 4 :: (load monotonic (s32))
PseudoRET

...
---
# Test with invalid atomic loads
name: invalid_atomic_loads
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x10', virtual-reg: '%0' }
body: |
bb.0:
liveins: $x10

; CHECK-LABEL: name: invalid_atomic_loads
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load monotonic (s32))
; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (load monotonic (s32))
; CHECK-NEXT: PseudoRET
;
; CHECK-4BYTE-LABEL: name: invalid_atomic_loads
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
%1:gpr = LW %0, 0 :: (load monotonic (s32))
%2:gpr = LW %0, 8 :: (load monotonic (s32))
%3:gpr = LW %0, 4 :: (load monotonic (s32))
%4:gpr = LW %0, 12 :: (load monotonic (s32))
PseudoRET

...
---
# Test with interleaving atomic loads and volatile loads
name: interleave_atomic_volatile_loads
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x10', virtual-reg: '%0' }
body: |
bb.0:
liveins: $x10

; CHECK-LABEL: name: interleave_atomic_volatile_loads
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
; CHECK-NEXT: PseudoRET
;
; CHECK-4BYTE-LABEL: name: interleave_atomic_volatile_loads
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
%1:gpr = LW %0, 0 :: (load monotonic (s32))
%2:gpr = LW %0, 8 :: (volatile load (s32))
%3:gpr = LW %0, 4 :: (load monotonic (s32))
%4:gpr = LW %0, 12 :: (volatile load (s32))
PseudoRET

...
---
# Test with interleaving atomic loads and volatile loads
name: interleave_atomic_volatile_loads2
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x10', virtual-reg: '%0' }
body: |
bb.0:
liveins: $x10

; CHECK-LABEL: name: interleave_atomic_volatile_loads2
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load unordered (s32))
; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load unordered (s32))
; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
; CHECK-NEXT: PseudoRET
;
; CHECK-4BYTE-LABEL: name: interleave_atomic_volatile_loads2
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load unordered (s32))
; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 8 :: (volatile load (s32))
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
%1:gpr = LW %0, 0 :: (load unordered (s32))
%2:gpr = LW %0, 8 :: (volatile load (s32))
%3:gpr = LW %0, 4 :: (load unordered (s32))
%4:gpr = LW %0, 12 :: (volatile load (s32))
PseudoRET

...
---
# Test store dependency - store modifies same location as load
Expand Down