From af13804025b41a7422c7b035f8e8de8a9db43870 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 3 Oct 2025 11:33:30 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 9 ++++-- .../SLPVectorizer/X86/no_alternate_divrem.ll | 30 +++++-------------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fedca65d241e8..21d9f20752b66 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10620,7 +10620,8 @@ class InstructionsCompatibilityAnalysis { /// Checks if the opcode is supported as the main opcode for copyable /// elements. static bool isSupportedOpcode(const unsigned Opcode) { - return Opcode == Instruction::Add || Opcode == Instruction::LShr; + return Opcode == Instruction::Add || Opcode == Instruction::LShr || + Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; } /// Identifies the best candidate value, which represents main opcode @@ -10937,6 +10938,8 @@ class InstructionsCompatibilityAnalysis { switch (MainOpcode) { case Instruction::Add: case Instruction::LShr: + case Instruction::SDiv: + case Instruction::UDiv: VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind); break; default: @@ -22062,8 +22065,10 @@ bool BoUpSLP::collectValuesToDemote( auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) { assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); return all_of(E.Scalars, [&](Value *V) { - auto *I = cast(V); APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + if (E.hasCopyableElements() && E.isCopyableElement(V)) + return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)); + auto *I = cast(V); return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); }); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index cf62fd5cf66f7..a888027479817 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -4,21 +4,14 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_sdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2 -; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4 ; CHECK-NEXT: ret void @@ -58,21 +51,14 @@ entry: define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_udiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 -; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 ; CHECK-NEXT: ret void