Skip to content

Commit

Permalink
[LSR] Don't force bases of foldable formulae to the final type.
Browse files Browse the repository at this point in the history
Summary:
Before emitting code for scaled registers, we prevent
SCEVExpander from hoisting any scaled addressing mode
by emitting all the bases first. However, these bases
are being forced to the final type, resulting in some
odd code.

For example, if the type of the base is an integer and
the final type is a pointer, we will emit an inttoptr
for the base, a ptrtoint for the scale, and then a
'reverse' GEP where the GEP pointer is actually the base
integer and the index is the pointer. It's more intuitive
to use the pointer as a pointer and the integer as index.

Patch by: Bevin Hansson

Reviewers: atrick, qcolombet, sanjoy

Reviewed By: qcolombet

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42103

llvm-svn: 323946
  • Loading branch information
mikaelholmen committed Feb 1, 2018
1 parent 1a6493b commit 6d06976
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 43 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Expand Down
56 changes: 26 additions & 30 deletions llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,25 @@ target triple = "x86_64-unknown-unknown"
define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) {
; JAG-LABEL: @maxArray(
; JAG-NEXT: entry:
; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
; JAG-NEXT: [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
; JAG-NEXT: [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
; JAG-NEXT: [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
; JAG-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
; JAG-NEXT: br label [[VECTOR_BODY:%.*]]
; JAG: vector.body:
; JAG-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
; JAG-NEXT: [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
; JAG-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
; JAG-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
; JAG-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; JAG-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
; JAG-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
; JAG-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
; JAG-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
; JAG-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
; JAG-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
; JAG-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; JAG-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand All @@ -45,27 +43,25 @@ define void @maxArray(double* noalias nocapture %x, double* noalias nocapture re
;
; HSW-LABEL: @maxArray(
; HSW-NEXT: entry:
; HSW-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
; HSW-NEXT: [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
; HSW-NEXT: [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
; HSW-NEXT: [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
; HSW-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
; HSW-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
; HSW-NEXT: br label [[VECTOR_BODY:%.*]]
; HSW: vector.body:
; HSW-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
; HSW-NEXT: [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
; HSW-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
; HSW-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
; HSW-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
; HSW-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
; HSW-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; HSW-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
; HSW-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
; HSW-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
; HSW-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
; HSW-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
; HSW-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
; HSW-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; HSW-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,25 @@ define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray)
; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
; CHECK: for.body2.preheader:
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
; CHECK: for.body2:
; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP6]], align 1
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[TMP0]]
; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP5]], align 1
; CHECK-NEXT: [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[LSR_IV1]]
; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP4]], align 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
; CHECK: for.inc.loopexit:
Expand Down Expand Up @@ -94,4 +91,3 @@ for.inc: ; preds = %for.inc.loopexit, %
for.end.loopexit: ; preds = %for.inc
ret void
}

0 comments on commit 6d06976

Please sign in to comment.