diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000..cc24278acbb41 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll new file mode 100644 index 0000000000000..0bb1d43617011 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s --passes=loop-reduce --mtriple=loongarch64 -S | FileCheck %s -check-prefix=CHECK-OPT +; RUN: llc < %s --mtriple=loongarch64 | FileCheck %s -check-prefix=CHECK-LLC + +define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) nounwind { +; CHECK-LLC-LABEL: foo: +; CHECK-LLC: # %bb.0: # %entry +; CHECK-LLC-NEXT: lu12i.w $a3, -1 +; CHECK-LLC-NEXT: lu12i.w $a4, 1 +; CHECK-LLC-NEXT: .p2align 4, , 16 +; CHECK-LLC-NEXT: .LBB0_1: # %for.body +; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-LLC-NEXT: add.d $a5, $a0, $a3 +; CHECK-LLC-NEXT: ldx.w $a5, $a5, $a4 +; CHECK-LLC-NEXT: add.d $a6, $a1, $a3 +; CHECK-LLC-NEXT: ldx.w $a6, $a6, $a4 +; CHECK-LLC-NEXT: add.d $a5, $a6, $a5 +; CHECK-LLC-NEXT: add.d $a6, $a2, $a3 +; CHECK-LLC-NEXT: addi.d $a3, $a3, 4 +; CHECK-LLC-NEXT: stptr.w $a5, $a6, 4096 +; CHECK-LLC-NEXT: bnez $a3, .LBB0_1 +; CHECK-LLC-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-LLC-NEXT: ret +; CHECK-OPT-LABEL: define void @foo( +; CHECK-OPT-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-OPT-NEXT: [[ENTRY:.*]]: +; CHECK-OPT-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-OPT: [[FOR_COND_CLEANUP:.*]]: +; CHECK-OPT-NEXT: ret void +; CHECK-OPT: [[FOR_BODY]]: +; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ -4096, %[[ENTRY]] ] +; CHECK-OPT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]] +; CHECK-OPT-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 4096 +; CHECK-OPT-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP5]], align 4 +; CHECK-OPT-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]] +; CHECK-OPT-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4096 +; CHECK-OPT-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 +; CHECK-OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]] +; CHECK-OPT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]] +; CHECK-OPT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 4096 +; CHECK-OPT-NEXT: store i32 [[ADD]], ptr [[SCEVGEP1]], align 4 +; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4 +; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv + %ldtmp = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv + %ldtmp1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %ldtmp1, %ldtmp + %arrayidx4 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv + store i32 %add, ptr %arrayidx4, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q, i32 %n) nounwind { +; CHECK-LLC-LABEL: bar: +; CHECK-LLC: # %bb.0: # %entry +; CHECK-LLC-NEXT: addi.w $a4, $a3, 0 +; CHECK-LLC-NEXT: blez $a4, .LBB1_3 +; CHECK-LLC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-LLC-NEXT: bstrpick.d $a3, $a3, 31, 0 +; CHECK-LLC-NEXT: .p2align 4, , 16 +; CHECK-LLC-NEXT: .LBB1_2: # %for.body +; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-LLC-NEXT: ld.w $a4, $a0, 0 +; CHECK-LLC-NEXT: ld.w $a5, $a1, 0 +; CHECK-LLC-NEXT: add.d $a4, $a5, $a4 +; CHECK-LLC-NEXT: st.w $a4, $a2, 0 +; CHECK-LLC-NEXT: addi.d $a3, $a3, -1 +; CHECK-LLC-NEXT: addi.d $a2, $a2, 4 +; CHECK-LLC-NEXT: addi.d $a1, $a1, 4 +; CHECK-LLC-NEXT: addi.d $a0, $a0, 4 +; CHECK-LLC-NEXT: bnez $a3, .LBB1_2 +; CHECK-LLC-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-LLC-NEXT: ret +; CHECK-OPT-LABEL: define void @bar( +; CHECK-OPT-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr captures(none) [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-OPT-NEXT: [[ENTRY:.*:]] +; CHECK-OPT-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-OPT-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK-OPT: [[FOR_BODY_PREHEADER]]: +; CHECK-OPT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-OPT-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-OPT: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-OPT-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK-OPT: [[FOR_COND_CLEANUP]]: +; CHECK-OPT-NEXT: ret void +; CHECK-OPT: [[FOR_BODY]]: +; CHECK-OPT-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-OPT-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_BODY]] ], [ [[Y]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-OPT-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_BODY]] ], [ [[Q]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[WIDE_TRIP_COUNT]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-OPT-NEXT: [[LDTMP:%.*]] = load i32, ptr [[LSR_IV4]], align 4 +; CHECK-OPT-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[LSR_IV2]], align 4 +; CHECK-OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]] +; CHECK-OPT-NEXT: store i32 [[ADD]], ptr [[LSR_IV1]], align 4 +; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-OPT-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-OPT-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4 +; CHECK-OPT-NEXT: [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4 +; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + %cmp10 = icmp sgt i32 %n, 0 + br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body, %for.body.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv + %ldtmp = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv + %ldtmp1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %ldtmp1, %ldtmp + %arrayidx4 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv + store i32 %add, ptr %arrayidx4, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +}