From efe342af343e6840b255114732672886b7edea00 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Mon, 29 Jul 2024 23:33:41 -0700 Subject: [PATCH 1/2] [SLP][REVEC] Pre-commit test. --- llvm/test/Transforms/SLPVectorizer/revec.ll | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index 31ee107c81cd4..a8274e18d274e 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -296,3 +296,28 @@ for.body13: ; preds = %for.body13, %entry store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4 br label %for.body13 } + +define void @test10() { +entry: + %0 = load <16 x i8>, ptr null, align 1 + %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> + %shuffle.i107 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> + %vmovl.i106 = sext <8 x i8> %shuffle.i to <8 x i16> + %vmovl.i = sext <8 x i8> %shuffle.i107 to <8 x i16> + %shuffle.i113 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> + %shuffle.i115 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> + %shuffle.i112 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> + %shuffle.i114 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> + %vmovl.i111 = sext <4 x i16> %shuffle.i113 to <4 x i32> + %vmovl.i110 = sext <4 x i16> %shuffle.i115 to <4 x i32> + %vmovl.i109 = sext <4 x i16> %shuffle.i112 to <4 x i32> + %vmovl.i108 = sext <4 x i16> %shuffle.i114 to <4 x i32> + %add.ptr29 = getelementptr i8, ptr null, i64 16 + %add.ptr32 = getelementptr i8, ptr null, i64 32 + %add.ptr35 = getelementptr i8, ptr null, i64 48 + store <4 x i32> %vmovl.i111, ptr null, align 4 + store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4 + store <4 x i32> %vmovl.i109, ptr %add.ptr32, align 4 + store <4 x i32> %vmovl.i108, ptr %add.ptr35, align 4 + ret void +} From 427b835e8edcad19887b9fd9775b348f0073fd24 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 27 Jun 2024 02:43:41 -0700 Subject: [PATCH 2/2] [SLP][REVEC] Make CastInst support vector instructions. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 +++++++++------- llvm/test/Transforms/SLPVectorizer/revec.ll | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ebfb11f841086..feffd9ae3c99b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9877,16 +9877,18 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto *SrcVecTy = getWidenedType(SrcScalarTy, VL.size()); unsigned Opcode = ShuffleOrOp; unsigned VecOpcode = Opcode; - if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() && + if (!ScalarTy->isFPOrFPVectorTy() && !SrcScalarTy->isFPOrFPVectorTy() && (SrcIt != MinBWs.end() || It != MinBWs.end())) { // Check if the values are candidates to demote. - unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy); + unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy->getScalarType()); if (SrcIt != MinBWs.end()) { SrcBWSz = SrcIt->second.first; + unsigned SrcScalarTyNumElements = getNumElements(SrcScalarTy); SrcScalarTy = IntegerType::get(F->getContext(), SrcBWSz); - SrcVecTy = getWidenedType(SrcScalarTy, VL.size()); + SrcVecTy = + getWidenedType(SrcScalarTy, VL.size() * SrcScalarTyNumElements); } - unsigned BWSz = DL->getTypeSizeInBits(ScalarTy); + unsigned BWSz = DL->getTypeSizeInBits(ScalarTy->getScalarType()); if (BWSz == SrcBWSz) { VecOpcode = Instruction::BitCast; } else if (BWSz < SrcBWSz) { @@ -13452,14 +13454,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Instruction::CastOps VecOpcode = CI->getOpcode(); Type *SrcScalarTy = cast(InVec->getType())->getElementType(); auto SrcIt = MinBWs.find(getOperandEntry(E, 0)); - if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() && + if (!ScalarTy->isFPOrFPVectorTy() && !SrcScalarTy->isFPOrFPVectorTy() && (SrcIt != MinBWs.end() || It != MinBWs.end() || - SrcScalarTy != CI->getOperand(0)->getType())) { + SrcScalarTy != CI->getOperand(0)->getType()->getScalarType())) { // Check if the values are candidates to demote. unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy); if (SrcIt != MinBWs.end()) SrcBWSz = SrcIt->second.first; - unsigned BWSz = DL->getTypeSizeInBits(ScalarTy); + unsigned BWSz = DL->getTypeSizeInBits(ScalarTy->getScalarType()); if (BWSz == SrcBWSz) { VecOpcode = Instruction::BitCast; } else if (BWSz < SrcBWSz) { diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index a8274e18d274e..59201da1d9ac1 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -298,6 +298,21 @@ for.body13: ; preds = %for.body13, %entry } define void @test10() { +; CHECK-LABEL: @test10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> poison, i64 16) +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> [[TMP1]], <16 x i8> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP3]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i16> [[TMP6]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[TMP8]] to <16 x i32> +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr null, align 4 +; CHECK-NEXT: ret void +; entry: %0 = load <16 x i8>, ptr null, align 1 %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32>