diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ce26e61880fd0..5f84175da703d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -599,12 +599,8 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { - if (isa(VecTy)) + if (isa(VecTy) && Factor != 2) return InstructionCost::getInvalid(); - auto *FVTy = cast(VecTy); - InstructionCost MemCost = - getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind); - unsigned VF = FVTy->getNumElements() / Factor; // The interleaved memory access pass will lower interleaved memory ops (i.e // a load and store followed by a specific shuffle) to vlseg/vsseg @@ -612,24 +608,35 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( // memory op if (!UseMaskForCond && !UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { - std::pair LT = getTypeLegalizationCost(FVTy); + auto *VTy = cast(VecTy); + std::pair LT = getTypeLegalizationCost(VTy); // Need to make sure type has't been scalarized - if (LT.second.isFixedLengthVector()) { - auto *LegalFVTy = FixedVectorType::get(FVTy->getElementType(), - LT.second.getVectorNumElements()); + if (LT.second.isVector()) { + auto *LegalVTy = VectorType::get(VTy->getElementType(), + LT.second.getVectorElementCount()); // FIXME: We use the memory op cost of the *legalized* type here, becuase // it's getMemoryOpCost returns a really expensive cost for types like // <6 x i8>, which show up when doing interleaves of Factor=3 etc. // Should the memory op cost of these be cheaper? - if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, Alignment, + if (TLI->isLegalInterleavedAccessType(LegalVTy, Factor, Alignment, AddressSpace, DL)) { InstructionCost LegalMemCost = getMemoryOpCost( - Opcode, LegalFVTy, Alignment, AddressSpace, CostKind); + Opcode, LegalVTy, Alignment, AddressSpace, CostKind); return LT.first + LegalMemCost; } } } + // TODO: Return the cost of interleaved accesses for scalable vector when + // unable to convert to segment accesses instructions. + if (isa(VecTy)) + return InstructionCost::getInvalid(); + + auto *FVTy = cast(VecTy); + InstructionCost MemCost = + getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind); + unsigned VF = FVTy->getNumElements() / Factor; + // An interleaved load will look like this for Factor=3: // %wide.vec = load <12 x i32>, ptr %3, align 4 // %strided.vec = shufflevector %wide.vec, poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 6fa197591ab33..576dc0833fa36 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -1,36 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=off -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefix=FIXED +; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefix=SCALABLE define void @load_store_factor2_i32(ptr %p) { ; CHECK-LABEL: @load_store_factor2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 -1 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -50,6 +62,112 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @load_store_factor2_i32( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] +; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], +; FIXED-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP5]] +; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -1 +; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> +; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 +; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 +; FIXED-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @load_store_factor2_i32( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP13]] +; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 -1 +; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) +; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 4 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 +; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 +; SCALABLE-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -77,33 +195,43 @@ exit: define void @load_store_factor2_i64(ptr %p) { ; CHECK-LABEL: @load_store_factor2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i32 -1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP14]], i32 -1 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -123,6 +251,112 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @load_store_factor2_i64( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] +; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 8 +; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], +; FIXED-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] +; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i32 -1 +; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> +; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> +; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @load_store_factor2_i64( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) +; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 1 +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP13]] +; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP14]], i32 -1 +; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) +; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -206,6 +440,122 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @load_store_factor3_i32( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; FIXED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] +; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <6 x i32>, ptr [[TMP3]], align 4 +; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i32> [[STRIDED_VEC]], +; FIXED-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i32> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP7:%.*]] = add i64 [[TMP5]], 1 +; FIXED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP7]] +; FIXED-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STRIDED_VEC2]], +; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 -2 +; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP6]], <4 x i32> +; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> +; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <6 x i32> +; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i32> [[TMP13]], <6 x i32> poison, <6 x i32> +; FIXED-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 +; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 +; FIXED-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 +; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; FIXED-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]] +; FIXED-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4 +; FIXED-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3 +; FIXED-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @load_store_factor3_i32( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] +; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <6 x i32>, ptr [[TMP3]], align 4 +; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> +; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i32> [[STRIDED_VEC]], +; SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i32> [[STRIDED_VEC1]], +; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[TMP5]], 1 +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STRIDED_VEC2]], +; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 -2 +; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP6]], <4 x i32> +; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> +; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <6 x i32> +; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i32> [[TMP13]], <6 x i32> poison, <6 x i32> +; SCALABLE-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 +; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 +; SCALABLE-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 +; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]] +; SCALABLE-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4 +; SCALABLE-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3 +; SCALABLE-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -248,37 +598,37 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP10]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP12]], [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], [[TMP13]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP15]], [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[P]], [[TMP16]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP18]], [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] +; CHECK-NEXT: [[TMP12:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -311,6 +661,135 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @load_store_factor3_i64( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = mul <4 x i64> [[VEC_IND]], +; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], <4 x i64> [[TMP0]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP1]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP2:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP2]], <4 x ptr> [[TMP1]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP0]], +; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP3]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP5:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER1]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP5]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP3]], +; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP6]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP7]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP8:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER2]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP8]], <4 x ptr> [[TMP7]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] +; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 +; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 +; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @load_store_factor3_i64( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP12:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] +; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 +; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 +; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -353,62 +832,62 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP10]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP12]], [[TMP11]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P]], [[TMP13]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP15]], [[TMP14]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[P]], [[TMP16]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP18]], [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP19:%.*]] = add [[TMP16]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[P]], [[TMP19]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP20]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP21:%.*]] = add [[WIDE_MASKED_GATHER3]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP21]], [[TMP20]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP22:%.*]] = add [[TMP19]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[P]], [[TMP22]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP23]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP24:%.*]] = add [[WIDE_MASKED_GATHER4]], shufflevector ( insertelement ( poison, i64 5, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP24]], [[TMP23]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP25:%.*]] = add [[TMP22]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[P]], [[TMP25]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP26]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP27:%.*]] = add [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP27]], [[TMP26]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP28:%.*]] = add [[TMP25]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[P]], [[TMP28]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP29]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i64 7, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP30]], [[TMP29]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP31:%.*]] = add [[TMP28]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[P]], [[TMP31]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP33:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP33]], [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP35]] +; CHECK-NEXT: [[TMP12:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP21:%.*]] = add [[TMP18]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[P]], [[TMP21]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_MASKED_GATHER3]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP23]], [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP24:%.*]] = add [[TMP21]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[P]], [[TMP24]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP26:%.*]] = add [[WIDE_MASKED_GATHER4]], shufflevector ( insertelement ( poison, i64 5, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP26]], [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP27:%.*]] = add [[TMP24]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[P]], [[TMP27]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP29:%.*]] = add [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP29]], [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP30:%.*]] = add [[TMP27]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[P]], [[TMP30]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP32:%.*]] = add [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i64 7, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP32]], [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP33:%.*]] = add [[TMP30]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[P]], [[TMP33]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP35:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP35]], [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -466,6 +945,235 @@ define void @load_store_factor8(ptr %p) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @load_store_factor8( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[VEC_IND]], +; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], <4 x i64> [[TMP0]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP1]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP2:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP2]], <4 x ptr> [[TMP1]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP0]], +; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP3]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP5:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER1]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP5]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP3]], +; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP6]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP7]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP8:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER2]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP8]], <4 x ptr> [[TMP7]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[TMP6]], +; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP9]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP11:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER3]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP11]], <4 x ptr> [[TMP10]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP9]], +; FIXED-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP12]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP13]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP14:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER4]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP14]], <4 x ptr> [[TMP13]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP15:%.*]] = add <4 x i64> [[TMP12]], +; FIXED-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP15]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP16]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP17:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER5]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP17]], <4 x ptr> [[TMP16]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP18:%.*]] = add <4 x i64> [[TMP15]], +; FIXED-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP18]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP19]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP20:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER6]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP20]], <4 x ptr> [[TMP19]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[TMP21:%.*]] = add <4 x i64> [[TMP18]], +; FIXED-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[P]], <4 x i64> [[TMP21]] +; FIXED-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP22]], i32 8, <4 x i1> , <4 x i64> poison) +; FIXED-NEXT: [[TMP23:%.*]] = add <4 x i64> [[WIDE_MASKED_GATHER7]], +; FIXED-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP23]], <4 x ptr> [[TMP22]], i32 8, <4 x i1> ) +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; FIXED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] +; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 +; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 +; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 +; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 +; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] +; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 +; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 +; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 +; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 +; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] +; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 +; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 +; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 +; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 +; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] +; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 +; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 +; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 +; FIXED-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 +; FIXED-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] +; FIXED-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 +; FIXED-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 +; FIXED-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 +; FIXED-NEXT: [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1 +; FIXED-NEXT: [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]] +; FIXED-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8 +; FIXED-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8 +; FIXED-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @load_store_factor8( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP12:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[P:%.*]], [[TMP12]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP14]], [[TMP13]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[P]], [[TMP15]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP17:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP17]], [[TMP16]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP18:%.*]] = add [[TMP15]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P]], [[TMP18]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP20]], [[TMP19]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP21:%.*]] = add [[TMP18]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[P]], [[TMP21]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP23:%.*]] = add [[WIDE_MASKED_GATHER3]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP23]], [[TMP22]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP24:%.*]] = add [[TMP21]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[P]], [[TMP24]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP26:%.*]] = add [[WIDE_MASKED_GATHER4]], shufflevector ( insertelement ( poison, i64 5, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP26]], [[TMP25]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP27:%.*]] = add [[TMP24]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[P]], [[TMP27]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP29:%.*]] = add [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP29]], [[TMP28]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP30:%.*]] = add [[TMP27]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[P]], [[TMP30]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP32:%.*]] = add [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i64 7, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP32]], [[TMP31]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP33:%.*]] = add [[TMP30]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[P]], [[TMP33]] +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; SCALABLE-NEXT: [[TMP35:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP35]], [[TMP34]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 +; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 +; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 +; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] +; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 +; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 +; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 +; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 +; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] +; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 +; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 +; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 +; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 +; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] +; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 +; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 +; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 +; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 +; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] +; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 +; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 +; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 +; SCALABLE-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 +; SCALABLE-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] +; SCALABLE-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 +; SCALABLE-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 +; SCALABLE-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 +; SCALABLE-NEXT: [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1 +; SCALABLE-NEXT: [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]] +; SCALABLE-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8 +; SCALABLE-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8 +; SCALABLE-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -529,40 +1237,40 @@ exit: define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8 -; CHECK-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4 -; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -581,6 +1289,114 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @combine_load_factor2_i32( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; FIXED-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 +; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 +; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] +; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 +; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 +; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; FIXED-NEXT: [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; FIXED-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] +; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]] +; FIXED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q]], i64 [[TMP1]] +; FIXED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; FIXED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8 +; FIXED-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4 +; FIXED-NEXT: store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; FIXED-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] +; FIXED-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] +; FIXED-NEXT: store i32 [[RES]], ptr [[DST]], align 4 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @combine_load_factor2_i32( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], [[TMP11]] +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; SCALABLE-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 +; SCALABLE-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] +; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] +; SCALABLE-NEXT: store i32 [[RES]], ptr [[DST]], align 4 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -609,40 +1425,40 @@ exit: define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP10]], i32 4 -; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP12]], align 8 -; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0 +; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -661,6 +1477,114 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; FIXED-LABEL: @combine_load_factor2_i64( +; FIXED-NEXT: entry: +; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; FIXED: vector.ph: +; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] +; FIXED: vector.body: +; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; FIXED-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 +; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 +; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] +; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 +; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP6]], align 8 +; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP7]], align 8 +; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> +; FIXED-NEXT: [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]] +; FIXED-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]] +; FIXED-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q]], i64 [[TMP1]] +; FIXED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; FIXED-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP10]], i32 4 +; FIXED-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP12]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP9]], ptr [[TMP13]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; FIXED: middle.block: +; FIXED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; FIXED: scalar.ph: +; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; FIXED-NEXT: br label [[LOOP:%.*]] +; FIXED: loop: +; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; FIXED-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] +; FIXED-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] +; FIXED-NEXT: store i64 [[RES]], ptr [[DST]], align 8 +; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; FIXED: exit: +; FIXED-NEXT: ret void +; +; SCALABLE-LABEL: @combine_load_factor2_i64( +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) +; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], [[TMP11]] +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 0 +; SCALABLE-NEXT: store [[TMP12]], ptr [[TMP14]], align 8 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: br label [[LOOP:%.*]] +; SCALABLE: loop: +; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 +; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 +; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 +; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] +; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 +; SCALABLE-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] +; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] +; SCALABLE-NEXT: store i64 [[RES]], ptr [[DST]], align 8 +; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 +; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 +; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: ret void +; entry: br label %loop loop: @@ -685,4 +1609,3 @@ loop: exit: ret void } - diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 12fdf2149daf4..6936887cd166c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -17,27 +17,27 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP12]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP14]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -90,26 +90,26 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -157,35 +157,51 @@ exit: define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-LABEL: @single_constant_stride_ptr_iv( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8064 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <8 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC3]], -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP6]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP7]], <8 x ptr> [[TMP1]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 128 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP15:%.*]] = add [[DOTSPLAT]], [[TMP14]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP15]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = add [[TMP19]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP16]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -236,18 +252,18 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NOSTRIDED: middle.block: @@ -320,18 +336,18 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NOSTRIDED: middle.block: @@ -452,20 +468,20 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP9]] -; NOSTRIDED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 4 -; NOSTRIDED-NEXT: [[TMP12:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP9]] -; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 -; NOSTRIDED-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; NOSTRIDED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP11]] +; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 4 +; NOSTRIDED-NEXT: [[TMP14:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP11]] +; NOSTRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 +; NOSTRIDED-NEXT: store [[TMP14]], ptr [[TMP16]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NOSTRIDED: middle.block: @@ -518,16 +534,16 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP11:%.*]] = add [[TMP10]], zeroinitializer -; STRIDED-NEXT: [[TMP12:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] -; STRIDED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP13:%.*]] = add [[TMP12]], zeroinitializer +; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; STRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP16]] +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -535,13 +551,13 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP16:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] -; STRIDED-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !8 -; STRIDED-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], [[TMP16]] -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META8:![0-9]+]] +; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -601,18 +617,18 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]] -; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; NOSTRIDED-NEXT: store [[TMP10]], ptr [[TMP9]], align 4 +; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NOSTRIDED: middle.block: @@ -729,43 +745,43 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]] ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]] -; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4 +; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[STRIDE]], [[TMP14]] -; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 +; STRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1 +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] +; STRIDED-NEXT: [[TMP19:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT]], [[TMP19]] ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; STRIDED-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 1 -; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE]], [[TMP22]] -; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP21]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP24]], i64 0 +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP20]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1 +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]] +; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0 +; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP25:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP26:%.*]] = add [[DOTSPLAT14]], [[TMP25]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP26]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 -; STRIDED-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP28]], [[TMP27]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] -; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]] +; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; STRIDED-NEXT: [[TMP28:%.*]] = add [[DOTSPLAT14]], [[TMP27]] +; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul [[TMP28]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[VECTOR_GEP17]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP21]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[TMP29]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] ; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: