diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index deb64bfc75063..8892f9b098349 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3051,7 +3051,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.VF.isVector() && shouldPack()) { Value *WideValue = State.Lane->isFirstLane() - ? PoisonValue::get(VectorType::get(UI->getType(), State.VF)) + ? PoisonValue::get(toVectorizedTy(UI->getType(), State.VF)) : State.get(this); State.set(this, State.packScalarIntoVectorizedValue(this, WideValue, *State.Lane)); @@ -3267,11 +3267,22 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. if (State.hasVectorValue(getOperand(0))) { - Value *VectorValue = State.get(getOperand(0)); - InsertElementInst *IEI = cast(VectorValue); - PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); - VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. - VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. + auto *VecI = cast(State.get(getOperand(0))); + assert((isa(VecI)) && + "Packed operands must generate an insertelement or insertvalue"); + + // If VectorI is a struct, it will be a sequence like: + // %1 = insertvalue %unmodified, %x, 0 + // %2 = insertvalue %1, %y, 1 + // %VectorI = insertvalue %2, %z, 2 + // To get the unmodified vector we need to look through the chain. + if (auto *StructTy = dyn_cast(VecI->getType())) + for (unsigned I = 0; I < StructTy->getNumContainedTypes() - 1; I++) + VecI = cast(VecI->getOperand(0)); + + PHINode *VPhi = State.Builder.CreatePHI(VecI->getType(), 2); + VPhi->addIncoming(VecI->getOperand(0), PredicatingBB); // Unmodified vector. + VPhi->addIncoming(VecI, PredicatedBB); // New vector with inserted element. if (State.hasVectorValue(this)) State.reset(this, VPhi); else diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll index 5c622f825beaf..99916a503750a 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll @@ -453,6 +453,221 @@ exit: ret void } +define void @struct_return_2xf32_replicate_predicated(ptr %a) { +; VF4-LABEL: define void @struct_return_2xf32_replicate_predicated( +; VF4-SAME: ptr [[A:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] +; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 8 +; VF4-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer +; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; VF4-NEXT: br i1 [[TMP2]], label %[[PRED_CALL_IF:.*]], label %[[PRED_CALL_CONTINUE:.*]] +; VF4: [[PRED_CALL_IF]]: +; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0 +; VF4-NEXT: [[TMP4:%.*]] = tail call { float, float } @fn2(float [[TMP3]]) #[[ATTR3:[0-9]+]] +; VF4-NEXT: [[TMP5:%.*]] = extractvalue { float, float } [[TMP4]], 0 +; VF4-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = insertvalue { <4 x float>, <4 x float> } poison, <4 x float> [[TMP6]], 0 +; VF4-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP4]], 1 +; VF4-NEXT: [[TMP9:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP7]], 1 +; VF4-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP8]], i32 0 +; VF4-NEXT: [[TMP11:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP7]], <4 x float> [[TMP10]], 1 +; VF4-NEXT: br label %[[PRED_CALL_CONTINUE]] +; VF4: [[PRED_CALL_CONTINUE]]: +; VF4-NEXT: [[TMP12:%.*]] = phi { <4 x float>, <4 x float> } [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_CALL_IF]] ] +; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; VF4-NEXT: br i1 [[TMP13]], label %[[PRED_CALL_IF1:.*]], label %[[PRED_CALL_CONTINUE2:.*]] +; VF4: [[PRED_CALL_IF1]]: +; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1 +; VF4-NEXT: [[TMP15:%.*]] = tail call { float, float } @fn2(float [[TMP14]]) #[[ATTR3]] +; VF4-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP15]], 0 +; VF4-NEXT: [[TMP17:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 0 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP16]], i32 1 +; VF4-NEXT: [[TMP19:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0 +; VF4-NEXT: [[TMP20:%.*]] = extractvalue { float, float } [[TMP15]], 1 +; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP19]], 1 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP20]], i32 1 +; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP19]], <4 x float> [[TMP22]], 1 +; VF4-NEXT: br label %[[PRED_CALL_CONTINUE2]] +; VF4: [[PRED_CALL_CONTINUE2]]: +; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP19]], %[[PRED_CALL_IF1]] ] +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_CALL_IF3:.*]], label %[[PRED_CALL_CONTINUE4:.*]] +; VF4: [[PRED_CALL_IF3]]: +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = tail call { float, float } @fn2(float [[TMP26]]) #[[ATTR3]] +; VF4-NEXT: [[TMP28:%.*]] = extractvalue { float, float } [[TMP27]], 0 +; VF4-NEXT: [[TMP29:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP24]], 0 +; VF4-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP28]], i32 2 +; VF4-NEXT: [[TMP31:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP24]], <4 x float> [[TMP30]], 0 +; VF4-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP27]], 1 +; VF4-NEXT: [[TMP33:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP31]], 1 +; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP32]], i32 2 +; VF4-NEXT: [[TMP35:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP31]], <4 x float> [[TMP34]], 1 +; VF4-NEXT: br label %[[PRED_CALL_CONTINUE4]] +; VF4: [[PRED_CALL_CONTINUE4]]: +; VF4-NEXT: [[TMP36:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP24]], %[[PRED_CALL_CONTINUE2]] ], [ [[TMP31]], %[[PRED_CALL_IF3]] ] +; VF4-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; VF4-NEXT: br i1 [[TMP37]], label %[[PRED_CALL_IF5:.*]], label %[[PRED_CALL_CONTINUE6:.*]] +; VF4: [[PRED_CALL_IF5]]: +; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3 +; VF4-NEXT: [[TMP39:%.*]] = tail call { float, float } @fn2(float [[TMP38]]) #[[ATTR3]] +; VF4-NEXT: [[TMP40:%.*]] = extractvalue { float, float } [[TMP39]], 0 +; VF4-NEXT: [[TMP41:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP36]], 0 +; VF4-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP40]], i32 3 +; VF4-NEXT: [[TMP43:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP36]], <4 x float> [[TMP42]], 0 +; VF4-NEXT: [[TMP44:%.*]] = extractvalue { float, float } [[TMP39]], 1 +; VF4-NEXT: [[TMP45:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP43]], 1 +; VF4-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP44]], i32 3 +; VF4-NEXT: [[TMP47:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP43]], <4 x float> [[TMP46]], 1 +; VF4-NEXT: br label %[[PRED_CALL_CONTINUE6]] +; VF4: [[PRED_CALL_CONTINUE6]]: +; VF4-NEXT: [[TMP48:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP36]], %[[PRED_CALL_CONTINUE4]] ], [ [[TMP43]], %[[PRED_CALL_IF5]] ] +; VF4-NEXT: [[TMP49:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP48]], 0 +; VF4-NEXT: [[TMP50:%.*]] = fdiv <4 x float> [[TMP49]], [[WIDE_LOAD]] +; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF4: [[PRED_STORE_IF]]: +; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0 +; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]] +; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0 +; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8 +; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF4: [[PRED_STORE_CONTINUE]]: +; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; VF4-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF4: [[PRED_STORE_IF7]]: +; VF4-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]] +; VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x float> [[TMP50]], i32 1 +; VF4-NEXT: store float [[TMP58]], ptr [[TMP57]], align 8 +; VF4-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; VF4: [[PRED_STORE_CONTINUE8]]: +; VF4-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; VF4-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF4: [[PRED_STORE_IF9]]: +; VF4-NEXT: [[TMP60:%.*]] = add i64 [[INDEX]], 2 +; VF4-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP60]] +; VF4-NEXT: [[TMP62:%.*]] = extractelement <4 x float> [[TMP50]], i32 2 +; VF4-NEXT: store float [[TMP62]], ptr [[TMP61]], align 8 +; VF4-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; VF4: [[PRED_STORE_CONTINUE10]]: +; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; VF4-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] +; VF4: [[PRED_STORE_IF11]]: +; VF4-NEXT: [[TMP64:%.*]] = add i64 [[INDEX]], 3 +; VF4-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP64]] +; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x float> [[TMP50]], i32 3 +; VF4-NEXT: store float [[TMP66]], ptr [[TMP65]], align 8 +; VF4-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; VF4: [[PRED_STORE_CONTINUE12]]: +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; VF4-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; +; VF2IC2-LABEL: define void @struct_return_2xf32_replicate_predicated( +; VF2IC2-SAME: ptr [[A:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] +; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8 +; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 +; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer +; VF2IC2-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], zeroinitializer +; VF2IC2-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; VF2IC2-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2IC2: [[PRED_STORE_IF]]: +; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 +; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]] +; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0 +; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] +; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 +; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]] +; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2IC2: [[PRED_STORE_CONTINUE]]: +; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; VF2IC2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; VF2IC2: [[PRED_STORE_IF2]]: +; VF2IC2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 +; VF2IC2-NEXT: [[TMP14:%.*]] = tail call { float, float } @fn2(float [[TMP13]]) #[[ATTR3]] +; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0 +; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] +; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 +; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]] +; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; VF2IC2: [[PRED_STORE_CONTINUE3]]: +; VF2IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; VF2IC2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; VF2IC2: [[PRED_STORE_IF4]]: +; VF2IC2-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0 +; VF2IC2-NEXT: [[TMP22:%.*]] = tail call { float, float } @fn2(float [[TMP21]]) #[[ATTR3]] +; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0 +; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] +; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0 +; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]] +; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; VF2IC2: [[PRED_STORE_CONTINUE5]]: +; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] +; VF2IC2: [[PRED_STORE_IF6]]: +; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 +; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]] +; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0 +; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] +; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 +; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]] +; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; VF2IC2: [[PRED_STORE_CONTINUE7]]: +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF2IC2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; VF2IC2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %in_val = load float, ptr %arrayidx, align 8 + %sgt_zero = fcmp ogt float %in_val, 0.0 + br i1 %sgt_zero, label %if.then, label %for.inc + +if.then: + %call = tail call { float, float } @fn2(float %in_val) #3 + %extract_a = extractvalue { float, float } %call, 0 + %div = fdiv float %extract_a, %in_val + store float %div, ptr %arrayidx, align 8 + br label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + declare { i64 } @fn1(float) declare { float, float } @fn2(float) declare { i32, i32, i32 } @fn3(i32) @@ -464,3 +679,4 @@ declare { <8 x i32>, <8 x i32>, <8 x i32> } @fixed_vec_fn3(<8 x i32>) attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn1(fixed_vec_fn1)" } attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn2(fixed_vec_fn2)" } attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn3(fixed_vec_fn3)" } +attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVnM8v_fn2(fixed_vec_fn2)" }