Skip to content

Commit da6c3d1

Browse files
committed
[VPlan] Use getOpcodeOrIntrinsicID to fix miscompile
1 parent e3d44e4 commit da6c3d1

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4104,6 +4104,15 @@ static bool interleaveStoredValuesMatch(ArrayRef<VPValue *> StoredValues) {
41044104
if (!IR->getInterleaveGroup()->isFull() ||
41054105
!equal(DefI->definedValues(), Def0->definedValues()))
41064106
return false;
4107+
} else if (Def0 != DefI) {
4108+
auto *SingleDef0 = dyn_cast<VPSingleDefRecipe>(Def0);
4109+
auto *SingleDefI = dyn_cast<VPSingleDefRecipe>(DefI);
4110+
if (!SingleDef0 || !SingleDefI)
4111+
return false;
4112+
auto Opc0 = getOpcodeOrIntrinsicID(SingleDef0);
4113+
auto OpcI = getOpcodeOrIntrinsicID(SingleDefI);
4114+
if (!Opc0 || Opc0 != OpcI)
4115+
return false;
41074116
}
41084117
}
41094118
}
@@ -4260,11 +4269,12 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
42604269
NarrowedOps.insert(RepR);
42614270
return RepR;
42624271
}
4263-
auto *WideLoad = dyn_cast<VPWidenLoadRecipe>(R);
4264-
if (!WideLoad) {
4272+
if (isa<VPSingleDefRecipe>(R)) {
4273+
// Narrow any intervening single-def recipes.
42654274
NarrowedOps.insert(V);
42664275
return V;
42674276
}
4277+
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
42684278

42694279
VPValue *PtrOp = WideLoad->getAddr();
42704280
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))

llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,27 @@ define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) {
107107
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255)
108108
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8>
109109
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]]
110-
; CHECK-NEXT: store <4 x i8> [[TMP4]], ptr [[NEXT_GEP]], align 1
111-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
110+
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16>
111+
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP23]], [[BROADCAST_SPLAT]]
112+
; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255)
113+
; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8>
114+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]]
115+
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16>
116+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]]
117+
; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255)
118+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8>
119+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]]
120+
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16>
121+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]]
122+
; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255)
123+
; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8>
124+
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]]
125+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
126+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
127+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
128+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
129+
; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1
130+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
112131
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
113132
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
114133
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)