-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MachinePipeliner] Fix store-store dependences (#72575)
The pipeliner needs to mark store-store order dependences as loop carried dependences. Otherwise, the stores may be scheduled further apart than the MII. The order dependences implies that the first instance of the dependent store is scheduled before the second instance of the source store instruction.
- Loading branch information
Showing
2 changed files
with
89 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ | ||
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s | ||
|
||
; Test that the pipeliner schedules the store instructions correctly. Since | ||
; there is a dependence between the store, they cannot be scheduled further than | ||
; MII cycles/instructions apart. That is, the first store cannot occur multiple | ||
; times before the second ctore in the schedule. | ||
define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr { | ||
; CHECK-LABEL: comp_method: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: extsw 7, 8 | ||
; CHECK-NEXT: extsw 8, 9 | ||
; CHECK-NEXT: clrldi 9, 6, 32 | ||
; CHECK-NEXT: addi 6, 3, -1 | ||
; CHECK-NEXT: mtctr 9 | ||
; CHECK-NEXT: li 11, 0 | ||
; CHECK-NEXT: sradi 12, 11, 2 | ||
; CHECK-NEXT: add 5, 5, 8 | ||
; CHECK-NEXT: li 8, 2 | ||
; CHECK-NEXT: li 3, 8 | ||
; CHECK-NEXT: addi 11, 7, 0 | ||
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill | ||
; CHECK-NEXT: lbzu 9, 1(6) | ||
; CHECK-NEXT: add 12, 12, 10 | ||
; CHECK-NEXT: extsb 9, 9 | ||
; CHECK-NEXT: stbx 8, 4, 9 | ||
; CHECK-NEXT: add 9, 9, 12 | ||
; CHECK-NEXT: bdz .LBB0_2 | ||
; CHECK-NEXT: .p2align 4 | ||
; CHECK-NEXT: .LBB0_1: | ||
; CHECK-NEXT: lbzu 0, 1(6) | ||
; CHECK-NEXT: sradi 12, 11, 2 | ||
; CHECK-NEXT: add 11, 11, 7 | ||
; CHECK-NEXT: add 12, 12, 10 | ||
; CHECK-NEXT: sldi 30, 9, 2 | ||
; CHECK-NEXT: add 9, 9, 30 | ||
; CHECK-NEXT: extsb 0, 0 | ||
; CHECK-NEXT: stbx 3, 5, 9 | ||
; CHECK-NEXT: add 9, 0, 12 | ||
; CHECK-NEXT: stbx 8, 4, 0 | ||
; CHECK-NEXT: bdnz .LBB0_1 | ||
; CHECK-NEXT: .LBB0_2: | ||
; CHECK-NEXT: sldi 4, 9, 2 | ||
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload | ||
; CHECK-NEXT: add 4, 9, 4 | ||
; CHECK-NEXT: stbx 3, 5, 4 | ||
; CHECK-NEXT: blr | ||
%8 = icmp sgt i32 %3, 64 | ||
tail call void @llvm.assume(i1 %8) | ||
%9 = and i32 %3, 1 | ||
%10 = icmp eq i32 %9, 0 | ||
tail call void @llvm.assume(i1 %10) | ||
%11 = sext i32 %5 to i64 | ||
%12 = sext i32 %6 to i64 | ||
%13 = zext nneg i32 %3 to i64 | ||
%14 = getelementptr i8, ptr %2, i64 %12 | ||
br label %16 | ||
|
||
15: | ||
ret void | ||
|
||
16: | ||
%17 = phi i64 [ 0, %7 ], [ %24, %16 ] | ||
%18 = getelementptr inbounds i8, ptr %0, i64 %17 | ||
%19 = load i8, ptr %18, align 1 | ||
%20 = sext i8 %19 to i64 | ||
%21 = getelementptr inbounds i8, ptr %1, i64 %20 | ||
store i8 2, ptr %21, align 1 | ||
%22 = mul nsw i64 %17, %11 | ||
%a1 = ashr i64 %22, 2 | ||
%a2 = add i64 %a1, %v1 | ||
%a3 = add i64 %20, %a2 | ||
%a4 = mul nsw i64 %a3, 5 | ||
%23 = getelementptr i8, ptr %14, i64 %a4 | ||
store i8 8, ptr %23, align 1 | ||
%24 = add nuw nsw i64 %17, 1 | ||
%25 = icmp eq i64 %24, %13 | ||
br i1 %25, label %15, label %16 | ||
} | ||
|
||
declare void @llvm.assume(i1 noundef) #1 | ||
|
||
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } |