Skip to content

Commit

Permalink
[AArch64][NFC] Prepare test cases (for D128302) to show more accurate…
Browse files Browse the repository at this point in the history
… cost estimation of extract-element could generate better assembly code.

Pre-commit the test cases (for D128302) to show that more accurate cost
estimation of extract-element could generate better code.

Differential Revision: https://reviews.llvm.org/D128945
  • Loading branch information
minglotus-6 committed Jul 7, 2022
1 parent 42e1035 commit b242e85
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
24 changes: 24 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/kryo.ll
Expand Up @@ -24,3 +24,27 @@ define void @vectorInstrCost() {

ret void
}

; CHECK-LABEL: vectorInstrExtractCost
define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) {

; Vector extracts - extracting each element at index 0 is considered
; free in the current implementation. When extracting element at index
; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as
; well.
;
; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1
; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2
%t1 = extractelement <4 x i64> %vecreg, i32 1
%t2 = extractelement <4 x i64> %vecreg, i32 2
%ele = add i64 %t2, 1
%cond = icmp eq i64 %t1, %ele

; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0
; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3
%t0 = extractelement <4 x i64> %vecreg, i32 0
%t3 = extractelement <4 x i64> %vecreg, i32 3
%val = select i1 %cond, i64 %t0 , i64 %t3

ret i64 %val
}
67 changes: 67 additions & 0 deletions llvm/test/Transforms/LICM/AArch64/extract-element.ll
@@ -0,0 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -licm -mtriple aarch64-linux-gnu -S < %s | FileCheck %s

define i1 @func(ptr %0, i64 %1) {
; CHECK-LABEL: @func(
; CHECK-NEXT: br label [[TMP3:%.*]]
; CHECK: 3:
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP12:%.*]], [[TMP11:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[TMP1:%.*]]
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[DOTSPLIT_LOOP_EXIT2:%.*]]
; CHECK: 6:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <1 x i64>, ptr [[TMP0:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i64> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], -1
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11]], label [[DOTSPLIT_LOOP_EXIT:%.*]]
; CHECK: 11:
; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1
; CHECK-NEXT: br label [[TMP3]]
; CHECK: .split.loop.exit:
; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ]
; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ]
; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0
; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]]
; CHECK-NEXT: br label [[TMP17:%.*]]
; CHECK: .split.loop.exit2:
; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ]
; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ]
; CHECK-NEXT: br label [[TMP17]]
; CHECK: 17:
; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ]
; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]]
; CHECK-NEXT: ret i1 [[TMP21]]
;
br label %3

3: ; preds = %14, %2
%4 = phi i64 [ 0, %2 ], [ %15, %14 ]
%5 = icmp ult i64 %4, %1
br i1 %5, label %6, label %16

6: ; preds = %3
%7 = getelementptr inbounds <1 x i64>, ptr %0, i64 %4
%8 = load <1 x i64>, ptr %7, align 8
%9 = extractelement <1 x i64> %8, i64 0
%10 = icmp eq i64 %9, -1
%11 = xor i64 %9, -1
%12 = add i64 %11, %4
%13 = icmp uge i64 %12, %1
br i1 %10, label %14, label %16

14: ; preds = %6
%15 = add i64 %4, 1
br label %3

16: ; preds = %3, %6
%17 = phi i1 [ %5, %3 ], [ %5, %6 ]
%18 = phi i1 [ %13, %6 ], [ undef, %3 ]
%19 = xor i1 %17, true
%20 = select i1 %19, i1 true, i1 %18
ret i1 %20
}

0 comments on commit b242e85

Please sign in to comment.