Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This change allows sinking defs from loop preheader with PHI-use into loop body. Loop sink can now see through PHI-use and select incoming blocks of value being used as candidate sink destination. It makes loop sink more effective so more LICM can be undone if proven unprofitable with profile info. It addresses the motivating case in D87551, without resorting to profile guided LICM which breaks canonicalization. Differential Revision: https://reviews.llvm.org/D152772
- Loading branch information
Showing
2 changed files
with
133 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: opt -S -verify-memoryssa -passes=loop-sink < %s | FileCheck %s | ||
; Make sure that unprofitable loop ICM can be undone by loop sink, and loop sink can handle | ||
; sinking through PHI use. | ||
|
||
|
||
define dso_local i32 @_Z3fooii(i32 %arg, i32 %arg1, i32 %arg2) local_unnamed_addr #0 !prof !29 { | ||
; CHECK-LABEL: define dso_local i32 @_Z3fooii | ||
; CHECK-SAME: (i32 [[ARG:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !prof [[PROF29:![0-9]+]] { | ||
; CHECK-NEXT: .l.check.preheader: | ||
; CHECK-NEXT: br label [[DOTL_CHECK:%.*]] | ||
; CHECK: .l.ret.loopexit: | ||
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[I10:%.*]], [[DOTL_ITERATE:%.*]] ] | ||
; CHECK-NEXT: ret i32 [[DOTLCSSA]] | ||
; CHECK: .l.check: | ||
; CHECK-NEXT: [[I4:%.*]] = phi i32 [ 0, [[DOTL_CHECK_PREHEADER:%.*]] ], [ [[I11:%.*]], [[DOTL_ITERATE]] ] | ||
; CHECK-NEXT: [[I5:%.*]] = phi i32 [ [[ARG]], [[DOTL_CHECK_PREHEADER]] ], [ [[I10]], [[DOTL_ITERATE]] ] | ||
; CHECK-NEXT: [[I6:%.*]] = icmp eq i32 [[I4]], [[ARG1]] | ||
; CHECK-NEXT: br i1 [[I6]], label [[DOTL_COLD:%.*]], label [[DOTL_ITERATE]], !prof [[PROF30:![0-9]+]] | ||
; CHECK: .l.cold: | ||
; CHECK-NEXT: [[FLAG:%.*]] = icmp eq i32 [[ARG1]], 5 | ||
; CHECK-NEXT: br i1 [[FLAG]], label [[DOTL_COLD1:%.*]], label [[DOTL_COLD2:%.*]] | ||
; CHECK: .l.cold1: | ||
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[ARG2]], [[ARG2]] | ||
; CHECK-NEXT: br label [[DOTL_COLD3:%.*]] | ||
; CHECK: .l.cold2: | ||
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[ARG2]], [[ARG2]] | ||
; CHECK-NEXT: br label [[DOTL_COLD3]] | ||
; CHECK: .l.cold3: | ||
; CHECK-NEXT: [[I7:%.*]] = phi i32 [ [[TMP1]], [[DOTL_COLD1]] ], [ [[TMP2]], [[DOTL_COLD2]] ] | ||
; CHECK-NEXT: [[I8:%.*]] = tail call i32 @_Z3bari(i32 [[I5]]) | ||
; CHECK-NEXT: [[I9:%.*]] = add nsw i32 [[I8]], [[I7]] | ||
; CHECK-NEXT: br label [[DOTL_ITERATE]] | ||
; CHECK: .l.iterate: | ||
; CHECK-NEXT: [[I10]] = phi i32 [ [[I9]], [[DOTL_COLD3]] ], [ [[I5]], [[DOTL_CHECK]] ] | ||
; CHECK-NEXT: [[I11]] = add nuw nsw i32 [[I4]], 1 | ||
; CHECK-NEXT: [[I12:%.*]] = icmp eq i32 [[I11]], [[I10]] | ||
; CHECK-NEXT: br i1 [[I12]], label [[DOTL_RET_LOOPEXIT:%.*]], label [[DOTL_CHECK]] | ||
; | ||
|
||
.l.check.preheader: | ||
%flag = icmp eq i32 %arg1, 5 | ||
%tmp2 = add nsw i32 %arg2, %arg2 | ||
%tmp1 = mul nsw i32 %arg2, %arg2 | ||
br label %.l.check | ||
|
||
.l.ret.loopexit: ; preds = %.l.iterate | ||
%.lcssa = phi i32 [ %i10, %.l.iterate ] | ||
ret i32 %.lcssa | ||
|
||
.l.check: ; preds = %.l.iterate, %.l.check.preheader | ||
%i4 = phi i32 [ 0, %.l.check.preheader ], [ %i11, %.l.iterate ] | ||
%i5 = phi i32 [ %arg, %.l.check.preheader ], [ %i10, %.l.iterate ] | ||
%i6 = icmp eq i32 %i4, %arg1 | ||
br i1 %i6, label %.l.cold, label %.l.iterate, !prof !30 | ||
|
||
.l.cold: ; preds = %.l.check | ||
br i1 %flag, label %.l.cold1, label %.l.cold2 | ||
|
||
.l.cold1: ; preds = %.l.cold | ||
br label %.l.cold3 | ||
|
||
.l.cold2: ; preds = %.l.cold | ||
br label %.l.cold3 | ||
|
||
.l.cold3: ; preds = %.l.cold2, %.l.cold1 | ||
%i7 = phi i32 [ %tmp1, %.l.cold1 ], [ %tmp2, %.l.cold2 ] | ||
%i8 = tail call i32 @_Z3bari(i32 %i5) | ||
%i9 = add nsw i32 %i8, %i7 | ||
br label %.l.iterate | ||
|
||
.l.iterate: ; preds = %.l.cold3, %.l.check | ||
%i10 = phi i32 [ %i9, %.l.cold3 ], [ %i5, %.l.check ] | ||
%i11 = add nuw nsw i32 %i4, 1 | ||
%i12 = icmp eq i32 %i11, %i10 | ||
br i1 %i12, label %.l.ret.loopexit, label %.l.check | ||
} | ||
|
||
declare dso_local i32 @_Z3bari(i32) local_unnamed_addr | ||
|
||
attributes #0 = { "use-sample-profile" } | ||
|
||
!llvm.module.flags = !{!0} | ||
|
||
!0 = !{i32 1, !"ProfileSummary", !1} | ||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11} | ||
!2 = !{!"ProfileFormat", !"SampleProfile"} | ||
!3 = !{!"TotalCount", i64 403} | ||
!4 = !{!"MaxCount", i64 200} | ||
!5 = !{!"MaxInternalCount", i64 0} | ||
!6 = !{!"MaxFunctionCount", i64 1} | ||
!7 = !{!"NumCounts", i64 6} | ||
!8 = !{!"NumFunctions", i64 1} | ||
!9 = !{!"IsPartialProfile", i64 0} | ||
!10 = !{!"PartialProfileRatio", double 0.000000e+00} | ||
!11 = !{!"DetailedSummary", !12} | ||
!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} | ||
!13 = !{i32 10000, i64 200, i32 2} | ||
!14 = !{i32 100000, i64 200, i32 2} | ||
!15 = !{i32 200000, i64 200, i32 2} | ||
!16 = !{i32 300000, i64 200, i32 2} | ||
!17 = !{i32 400000, i64 200, i32 2} | ||
!18 = !{i32 500000, i64 200, i32 2} | ||
!19 = !{i32 600000, i64 200, i32 2} | ||
!20 = !{i32 700000, i64 200, i32 2} | ||
!21 = !{i32 800000, i64 200, i32 2} | ||
!22 = !{i32 900000, i64 200, i32 2} | ||
!23 = !{i32 950000, i64 200, i32 2} | ||
!24 = !{i32 990000, i64 200, i32 2} | ||
!25 = !{i32 999000, i64 1, i32 5} | ||
!26 = !{i32 999900, i64 1, i32 5} | ||
!27 = !{i32 999990, i64 1, i32 5} | ||
!28 = !{i32 999999, i64 1, i32 5} | ||
!29 = !{!"function_entry_count", i64 2} | ||
!30 = !{!"branch_weights", i32 1, i32 201} |