diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp old mode 100755 new mode 100644 index a13df6c5bf552..4bece85d3cfbf --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1666,6 +1666,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::vector_interleave7: case Intrinsic::vector_interleave8: case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_deinterleave8: // Target intrinsics case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: @@ -4425,31 +4431,42 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, return nullptr; return ConstantStruct::get(StTy, SinResult, CosResult); } - case Intrinsic::vector_deinterleave2: { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_deinterleave8: { + unsigned NumResults = StTy->getNumElements(); auto *Vec = Operands[0]; auto *VecTy = cast(Vec->getType()); + ElementCount ResultEC = + VecTy->getElementCount().divideCoefficientBy(NumResults); + if (auto *EltC = Vec->getSplatValue()) { - ElementCount HalfEC = VecTy->getElementCount().divideCoefficientBy(2); - auto *HalfVec = ConstantVector::getSplat(HalfEC, EltC); - return ConstantStruct::get(StTy, HalfVec, HalfVec); + auto *ResultVec = ConstantVector::getSplat(ResultEC, EltC); + SmallVector Results(NumResults, ResultVec); + return ConstantStruct::get(StTy, Results); } - if (!isa(Vec->getType())) + if (!ResultEC.isFixed()) return nullptr; - unsigned NumElements = VecTy->getElementCount().getFixedValue() / 2; - SmallVector Res0(NumElements), Res1(NumElements); - for (unsigned I = 0; I < NumElements; ++I) { - Constant *Elt0 = Vec->getAggregateElement(2 * I); - Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); - if (!Elt0 || !Elt1) - return nullptr; - Res0[I] = Elt0; - Res1[I] = Elt1; + unsigned NumElements = ResultEC.getFixedValue(); + SmallVector Results(NumResults); + SmallVector Elements(NumElements); + for (unsigned I = 0; I != NumResults; ++I) { + for (unsigned J = 0; J != NumElements; ++J) { + Constant *Elt = Vec->getAggregateElement(J * NumResults + I); + if (!Elt) + return nullptr; + Elements[J] = Elt; + } + Results[I] = ConstantVector::get(Elements); } - return ConstantStruct::get(StTy, ConstantVector::get(Res0), - ConstantVector::get(Res1)); + return ConstantStruct::get(StTy, Results); } default: // TODO: Constant folding of vector intrinsics that fall through here does diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index fcae336f48b77..5d9ed867c5e68 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -130,3 +130,195 @@ define { , } @fold_scalable_vector_dein %1 = call { , } @llvm.vector.deinterleave2.v4f32.v8f32( splat (float 1.0)) ret { , } %1 } + +define { <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave3() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave3() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave3.v4i32.v12i32(<12 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , } @fold_scalable_vector_deinterleave3() { +; CHECK-LABEL: define { , , } @fold_scalable_vector_deinterleave3() { +; CHECK-NEXT: ret { , , } zeroinitializer +; + %1 = call { , , } @llvm.vector.deinterleave3.v4i32.v12i32( zeroinitializer) + ret { , , } %1 +} + +define { , , } @fold_scalable_vector_deinterleave3_splat() { +; CHECK-LABEL: define { , , } @fold_scalable_vector_deinterleave3_splat() { +; CHECK-NEXT: ret { , , } { splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , } @llvm.vector.deinterleave3.v4i32.v12i32( splat (i32 1)) + ret { , , } %1 +} + +define { , , } @fold_scalable_vector_deinterleave3_splatfp() { +; CHECK-LABEL: define { , , } @fold_scalable_vector_deinterleave3_splatfp() { +; CHECK-NEXT: ret { , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , } @llvm.vector.deinterleave3.v4f32.v12f32( splat (float 1.0)) + ret { , , } %1 +} + +define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave4() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave4() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave4.v4i32.v16i32(<16 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , , } @fold_scalable_vector_deinterleave4() { +; CHECK-LABEL: define { , , , } @fold_scalable_vector_deinterleave4() { +; CHECK-NEXT: ret { , , , } zeroinitializer +; + %1 = call { , , , } @llvm.vector.deinterleave4.v4i32.v16i32( zeroinitializer) + ret { , , , } %1 +} + +define { , , , } @fold_scalable_vector_deinterleave4_splat() { +; CHECK-LABEL: define { , , , } @fold_scalable_vector_deinterleave4_splat() { +; CHECK-NEXT: ret { , , , } { splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , , } @llvm.vector.deinterleave4.v4i32.v16i32( splat (i32 1)) + ret { , , , } %1 +} + +define { , , , } @fold_scalable_vector_deinterleave4_splatfp() { +; CHECK-LABEL: define { , , , } @fold_scalable_vector_deinterleave4_splatfp() { +; CHECK-NEXT: ret { , , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , , } @llvm.vector.deinterleave4.v4f32.v16f32( splat (float 1.0)) + ret { , , , } %1 +} + +define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave5() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave5() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave5.v4i32.v20i32(<20 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , , , } @fold_scalable_vector_deinterleave5() { +; CHECK-LABEL: define { , , , , } @fold_scalable_vector_deinterleave5() { +; CHECK-NEXT: ret { , , , , } zeroinitializer +; + %1 = call { , , , , } @llvm.vector.deinterleave5.v4i32.v20i32( zeroinitializer) + ret { , , , , } %1 +} + +define { , , , , } @fold_scalable_vector_deinterleave5_splat() { +; CHECK-LABEL: define { , , , , } @fold_scalable_vector_deinterleave5_splat() { +; CHECK-NEXT: ret { , , , , } { splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , , , } @llvm.vector.deinterleave5.v4i32.v20i32( splat (i32 1)) + ret { , , , , } %1 +} + +define { , , , , } @fold_scalable_vector_deinterleave5_splatfp() { +; CHECK-LABEL: define { , , , , } @fold_scalable_vector_deinterleave5_splatfp() { +; CHECK-NEXT: ret { , , , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , , , } @llvm.vector.deinterleave5.v4f32.v20f32( splat (float 1.0)) + ret { , , , , } %1 +} + +define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave6() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave6() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave6.v4i32.v24i32(<24 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , , , , } @fold_scalable_vector_deinterleave6() { +; CHECK-LABEL: define { , , , , , } @fold_scalable_vector_deinterleave6() { +; CHECK-NEXT: ret { , , , , , } zeroinitializer +; + %1 = call { , , , , , } @llvm.vector.deinterleave6.v4i32.v24i32( zeroinitializer) + ret { , , , , , } %1 +} + +define { , , , , , } @fold_scalable_vector_deinterleave6_splat() { +; CHECK-LABEL: define { , , , , , } @fold_scalable_vector_deinterleave6_splat() { +; CHECK-NEXT: ret { , , , , , } { splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , , , , } @llvm.vector.deinterleave6.v4i32.v24i32( splat (i32 1)) + ret { , , , , , } %1 +} + +define { , , , , , } @fold_scalable_vector_deinterleave6_splatfp() { +; CHECK-LABEL: define { , , , , , } @fold_scalable_vector_deinterleave6_splatfp() { +; CHECK-NEXT: ret { , , , , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , , , , } @llvm.vector.deinterleave6.v4f32.v24f32( splat (float 1.0)) + ret { , , , , , } %1 +} + +define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave7() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave7() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave7.v4i32.v28i32(<28 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , , , , , } @fold_scalable_vector_deinterleave7() { +; CHECK-LABEL: define { , , , , , , } @fold_scalable_vector_deinterleave7() { +; CHECK-NEXT: ret { , , , , , , } zeroinitializer +; + %1 = call { , , , , , , } @llvm.vector.deinterleave7.v4i32.v28i32( zeroinitializer) + ret { , , , , , , } %1 +} + +define { , , , , , , } @fold_scalable_vector_deinterleave7_splat() { +; CHECK-LABEL: define { , , , , , , } @fold_scalable_vector_deinterleave7_splat() { +; CHECK-NEXT: ret { , , , , , , } { splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , , , , , } @llvm.vector.deinterleave7.v4i32.v28i32( splat (i32 1)) + ret { , , , , , , } %1 +} + +define { , , , , , , } @fold_scalable_vector_deinterleave7_splatfp() { +; CHECK-LABEL: define { , , , , , , } @fold_scalable_vector_deinterleave7_splatfp() { +; CHECK-NEXT: ret { , , , , , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , , , , , } @llvm.vector.deinterleave7.v4f32.v28f32( splat (float 1.0)) + ret { , , , , , , } %1 +} + +define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave8() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @fold_vector_deinterleave8() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> } +; + %1 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.vector.deinterleave8.v4i32.v32i32(<32 x i32> ) + ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %1 +} + +define { , , , , , , , } @fold_scalable_vector_deinterleave8() { +; CHECK-LABEL: define { , , , , , , , } @fold_scalable_vector_deinterleave8() { +; CHECK-NEXT: ret { , , , , , , , } zeroinitializer +; + %1 = call { , , , , , , , } @llvm.vector.deinterleave8.v4i32.v32i32( zeroinitializer) + ret { , , , , , , , } %1 +} + +define { , , , , , , , } @fold_scalable_vector_deinterleave8_splat() { +; CHECK-LABEL: define { , , , , , , , } @fold_scalable_vector_deinterleave8_splat() { +; CHECK-NEXT: ret { , , , , , , , } { splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1), splat (i32 1) } +; + %1 = call { , , , , , , , } @llvm.vector.deinterleave8.v4i32.v32i32( splat (i32 1)) + ret { , , , , , , , } %1 +} + +define { , , , , , , , } @fold_scalable_vector_deinterleave8_splatfp() { +; CHECK-LABEL: define { , , , , , , , } @fold_scalable_vector_deinterleave8_splatfp() { +; CHECK-NEXT: ret { , , , , , , , } { splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00), splat (float 1.000000e+00) } +; + %1 = call { , , , , , , , } @llvm.vector.deinterleave8.v4f32.v32f32( splat (float 1.0)) + ret { , , , , , , , } %1 +}