From c79de90773dae1ef37f25b9873ea71eb681868ef Mon Sep 17 00:00:00 2001 From: XChy Date: Sat, 6 Sep 2025 16:55:27 +0800 Subject: [PATCH 1/4] Precommit tests --- .../VectorCombine/X86/bitop-of-castops.ll | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index c6253a7b858ad..1e527538dd594 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -433,6 +433,19 @@ define <2 x i16> @and_bitcast_f32_to_v2i16_constant(float %a) { ret <2 x i16> %and } +define <2 x i16> @and_bitcast_f32_to_v2i16(float %a, float %b) { +; CHECK-LABEL: @and_bitcast_f32_to_v2i16( +; CHECK-NEXT: [[BC1:%.*]] = bitcast float [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[BC2:%.*]] = bitcast float [[B:%.*]] to <2 x i16> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i16> [[BC1]], [[BC2]] +; CHECK-NEXT: ret <2 x i16> [[AND]] +; + %bc1 = bitcast float %a to <2 x i16> + %bc2 = bitcast float %b to <2 x i16> + %and = and <2 x i16> %bc1, %bc2 + ret <2 x i16> %and +} + ; Negative test: bitcast from vector float to scalar int (optimization should not apply) define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) { ; CHECK-LABEL: @and_bitcast_v2f32_to_i64_constant( @@ -445,6 +458,19 @@ define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) { ret i64 %and } +define i64 @and_bitcast_v2f32_to_i64(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: @and_bitcast_v2f32_to_i64( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x float> [[A:%.*]] to i64 +; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x float> [[B:%.*]] to i64 +; CHECK-NEXT: [[AND:%.*]] = and i64 [[BC1]], [[BC2]] +; CHECK-NEXT: ret i64 [[AND]] +; + %bc1 = bitcast <2 x float> %a to i64 + %bc2 = bitcast <2 x float> %b to i64 + %and = and i64 %bc1, %bc2 + ret i64 %and +} + ; Test no-op bitcast define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) { ; CHECK-LABEL: @xor_bitcast_i16_to_i16_constant( @@ -457,6 +483,19 @@ define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) { ret i16 %or } +define i16 @xor_bitcast_i16_to_i16(i16 %a, i16 %b) { +; CHECK-LABEL: @xor_bitcast_i16_to_i16( +; CHECK-NEXT: [[BC1:%.*]] = bitcast i16 [[A:%.*]] to i16 +; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B:%.*]] to i16 +; CHECK-NEXT: [[OR:%.*]] = xor i16 [[BC1]], [[BC2]] +; CHECK-NEXT: ret i16 [[OR]] +; + %bc1 = bitcast i16 %a to i16 + %bc2 = bitcast i16 %b to i16 + %or = xor i16 %bc1, %bc2 + ret i16 %or +} + ; Test bitwise operations with integer vector to integer bitcast define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) { ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1_constant( @@ -469,6 +508,19 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) { ret <16 x i1> %or } +define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) { +; CHECK-LABEL: @xor_bitcast_i16_to_v16i1( +; CHECK-NEXT: [[BC1:%.*]] = bitcast i16 [[A:%.*]] to <16 x i1> +; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B:%.*]] to <16 x i1> +; CHECK-NEXT: [[OR:%.*]] = xor <16 x i1> [[BC1]], [[BC2]] +; CHECK-NEXT: ret <16 x i1> [[OR]] +; + %bc1 = bitcast i16 %a to <16 x i1> + %bc2 = bitcast i16 %b to <16 x i1> + %or = xor <16 x i1> %bc1, %bc2 + ret <16 x i1> %or +} + ; Test bitwise operations with integer vector to integer bitcast define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) { ; CHECK-LABEL: @or_bitcast_v16i1_to_i16_constant( @@ -480,3 +532,16 @@ define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) { %or = or i16 %bc, 3 ret i16 %or } + +define i16 @or_bitcast_v16i1_to_i16(<16 x i1> %a, <16 x i1> %b) { +; CHECK-LABEL: @or_bitcast_v16i1_to_i16( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <16 x i1> [[A:%.*]] to i16 +; CHECK-NEXT: [[BC2:%.*]] = bitcast <16 x i1> [[B:%.*]] to i16 +; CHECK-NEXT: [[OR:%.*]] = or i16 [[BC1]], [[BC2]] +; CHECK-NEXT: ret i16 [[OR]] +; + %bc1 = bitcast <16 x i1> %a to i16 + %bc2 = bitcast <16 x i1> %b to i16 + %or = or i16 %bc1, %bc2 + ret i16 %or +} From 84ae9236ee2c261ab2e9ca3b8b428fef774d4fd7 Mon Sep 17 00:00:00 2001 From: XChy Date: Sat, 6 Sep 2025 16:58:44 +0800 Subject: [PATCH 2/4] [VectorCombine] Relax vector type constraint on bitop(bitcast, bitcast) --- .../Transforms/Vectorize/VectorCombine.cpp | 29 +++++++++---------- .../VectorCombine/X86/bitop-of-castops.ll | 7 ++--- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index b1c7a2682785b..75c613eca97cf 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -870,14 +870,15 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) { if (LHSSrc->getType() != RHSSrc->getType()) return false; - // Only handle vector types with integer elements - auto *SrcVecTy = dyn_cast(LHSSrc->getType()); - auto *DstVecTy = dyn_cast(I.getType()); - if (!SrcVecTy || !DstVecTy) + auto *SrcTy = LHSSrc->getType(); + auto *DstTy = I.getType(); + // Only handle vector types with integer elements if the cast is not bitcast + if (CastOpcode != Instruction::BitCast && + (!isa(SrcTy) || !isa(DstTy))) return false; - if (!SrcVecTy->getScalarType()->isIntegerTy() || - !DstVecTy->getScalarType()->isIntegerTy()) + if (!SrcTy->getScalarType()->isIntegerTy() || + !DstTy->getScalarType()->isIntegerTy()) return false; // Cost Check : @@ -885,23 +886,21 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) { // NewCost = bitlogic + cast // Calculate specific costs for each cast with instruction context - InstructionCost LHSCastCost = - TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy, - TTI::CastContextHint::None, CostKind, LHSCast); - InstructionCost RHSCastCost = - TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy, - TTI::CastContextHint::None, CostKind, RHSCast); + InstructionCost LHSCastCost = TTI.getCastInstrCost( + CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, LHSCast); + InstructionCost RHSCastCost = TTI.getCastInstrCost( + CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, RHSCast); InstructionCost OldCost = - TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstVecTy, CostKind) + + TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstTy, CostKind) + LHSCastCost + RHSCastCost; // For new cost, we can't provide an instruction (it doesn't exist yet) InstructionCost GenericCastCost = TTI.getCastInstrCost( - CastOpcode, DstVecTy, SrcVecTy, TTI::CastContextHint::None, CostKind); + CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind); InstructionCost NewCost = - TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcVecTy, CostKind) + + TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcTy, CostKind) + GenericCastCost; // Account for multi-use casts using specific costs diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index 1e527538dd594..d17e25a845b5b 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -510,10 +510,9 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) { define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) { ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1( -; CHECK-NEXT: [[BC1:%.*]] = bitcast i16 [[A:%.*]] to <16 x i1> -; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B:%.*]] to <16 x i1> -; CHECK-NEXT: [[OR:%.*]] = xor <16 x i1> [[BC1]], [[BC2]] -; CHECK-NEXT: ret <16 x i1> [[OR]] +; CHECK-NEXT: [[B:%.*]] = xor i16 [[A:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B]] to <16 x i1> +; CHECK-NEXT: ret <16 x i1> [[BC2]] ; %bc1 = bitcast i16 %a to <16 x i1> %bc2 = bitcast i16 %b to <16 x i1> From d29fbb40645007de54755f457ab189ad3a2f224f Mon Sep 17 00:00:00 2001 From: XChy Date: Sun, 7 Sep 2025 21:21:16 +0800 Subject: [PATCH 3/4] add comment --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 75c613eca97cf..7a0b7ad57a493 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -872,11 +872,13 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) { auto *SrcTy = LHSSrc->getType(); auto *DstTy = I.getType(); - // Only handle vector types with integer elements if the cast is not bitcast + // Bitcasts can handle scalar/vector mixes, such as i16 -> <16 x i1>. + // Other casts only handle vector types with integer elements. if (CastOpcode != Instruction::BitCast && (!isa(SrcTy) || !isa(DstTy))) return false; + // Only integer scalar/vector values are legal for bitwise logic operations. if (!SrcTy->getScalarType()->isIntegerTy() || !DstTy->getScalarType()->isIntegerTy()) return false; From 5338fa41264173a233f51ed9f4b6b69d4c2c3b5f Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 8 Sep 2025 14:21:52 +0800 Subject: [PATCH 4/4] resolve conflict --- llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index d17e25a845b5b..f6c9dce542ef4 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -510,9 +510,9 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) { define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) { ; CHECK-LABEL: @xor_bitcast_i16_to_v16i1( -; CHECK-NEXT: [[B:%.*]] = xor i16 [[A:%.*]], [[B1:%.*]] -; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B]] to <16 x i1> -; CHECK-NEXT: ret <16 x i1> [[BC2]] +; CHECK-NEXT: [[B1:%.*]] = xor i16 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[BC3:%.*]] = bitcast i16 [[B1]] to <16 x i1> +; CHECK-NEXT: ret <16 x i1> [[BC3]] ; %bc1 = bitcast i16 %a to <16 x i1> %bc2 = bitcast i16 %b to <16 x i1>