diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 92a9388e5cb7b..02629daeeb2f7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -153,6 +153,7 @@ class LoongArchPassConfig : public TargetPassConfig { LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { setEnableSinkAndFold(EnableSinkFold); + EnableLoopTermFold = true; } LoongArchTargetMachine &getLoongArchTargetMachine() const { diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 661f67d4989c4..1391c44e35443 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -44,6 +44,7 @@ ; LAXX-NEXT: Canonicalize Freeze Instructions in Loops ; LAXX-NEXT: Induction Variable Users ; LAXX-NEXT: Loop Strength Reduction +; LAXX-NEXT: Loop Terminator Folding ; LAXX-NEXT: Basic Alias Analysis (stateless AA impl) ; LAXX-NEXT: Function Alias Analysis Results ; LAXX-NEXT: Merge contiguous icmps into a memcmp diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll index 0f81f860025df..7a0e0d77f0690 100644 --- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll +++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll @@ -9,13 +9,13 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s ; LA464-NEXT: blez $a1, .LBB0_3 ; LA464-NEXT: # %bb.1: ; LA464-NEXT: bstrpick.d $a1, $a1, 31, 0 +; LA464-NEXT: alsl.d $a1, $a1, $a0, 2 ; LA464-NEXT: .p2align 4, , 16 ; LA464-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; LA464-NEXT: ld.w $a3, $a0, 0 -; LA464-NEXT: add.w $a2, $a3, $a2 -; LA464-NEXT: addi.d $a1, $a1, -1 ; LA464-NEXT: addi.d $a0, $a0, 4 -; LA464-NEXT: bnez $a1, .LBB0_2 +; LA464-NEXT: add.w $a2, $a3, $a2 +; LA464-NEXT: bne $a0, $a1, .LBB0_2 ; LA464-NEXT: .LBB0_3: ; LA464-NEXT: move $a0, $a2 ; LA464-NEXT: ret diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll index a09a9a37034e1..4a342dc921f12 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s --passes=loop-reduce --mtriple=loongarch64 -S | FileCheck %s -check-prefix=CHECK-OPT +; RUN: opt < %s --passes=loop-reduce,loop-term-fold --mtriple=loongarch64 -S | FileCheck %s -check-prefix=CHECK-TF ; RUN: llc < %s --mtriple=loongarch64 | FileCheck %s -check-prefix=CHECK-LLC define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) nounwind { ; CHECK-LLC-LABEL: foo: ; CHECK-LLC: # %bb.0: # %entry ; CHECK-LLC-NEXT: move $a3, $zero -; CHECK-LLC-NEXT: ori $a4, $zero, 1024 +; CHECK-LLC-NEXT: lu12i.w $a4, 1 ; CHECK-LLC-NEXT: .p2align 4, , 16 ; CHECK-LLC-NEXT: .LBB0_1: # %for.body ; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1 @@ -14,9 +15,8 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap ; CHECK-LLC-NEXT: ldx.w $a6, $a1, $a3 ; CHECK-LLC-NEXT: add.d $a5, $a6, $a5 ; CHECK-LLC-NEXT: stx.w $a5, $a2, $a3 -; CHECK-LLC-NEXT: addi.d $a4, $a4, -1 ; CHECK-LLC-NEXT: addi.d $a3, $a3, 4 -; CHECK-LLC-NEXT: bnez $a4, .LBB0_1 +; CHECK-LLC-NEXT: bne $a3, $a4, .LBB0_1 ; CHECK-LLC-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-LLC-NEXT: ret ; CHECK-OPT-LABEL: define void @foo( @@ -40,6 +40,25 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap ; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; +; CHECK-TF-LABEL: define void @foo( +; CHECK-TF-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-TF: [[FOR_COND_CLEANUP:.*]]: +; CHECK-TF-NEXT: ret void +; CHECK-TF: [[FOR_BODY]]: +; CHECK-TF-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ] +; CHECK-TF-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV1]] +; CHECK-TF-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP4]], align 4 +; CHECK-TF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV1]] +; CHECK-TF-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 +; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]] +; CHECK-TF-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV1]] +; CHECK-TF-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4 +; CHECK-TF-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 4 +; CHECK-TF-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq i64 [[LSR_IV_NEXT2]], 4096 +; CHECK-TF-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; entry: br label %for.body @@ -106,6 +125,32 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap ; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]] ; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] ; +; CHECK-TF-LABEL: define void @bar( +; CHECK-TF-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-TF-NEXT: [[ENTRY:.*:]] +; CHECK-TF-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-TF-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK-TF: [[FOR_BODY_PREHEADER]]: +; CHECK-TF-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-TF-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2 +; CHECK-TF-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-TF: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-TF-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK-TF: [[FOR_COND_CLEANUP]]: +; CHECK-TF-NEXT: ret void +; CHECK-TF: [[FOR_BODY]]: +; CHECK-TF-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-TF-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]] +; CHECK-TF-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP2]], align 4 +; CHECK-TF-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]] +; CHECK-TF-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP1]], align 4 +; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]] +; CHECK-TF-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]] +; CHECK-TF-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4 +; CHECK-TF-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 +; CHECK-TF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]] +; CHECK-TF-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; entry: %cmp10 = icmp sgt i32 %n, 0 br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup