-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[msan][NFCI] Generalize handleVectorPmaddIntrinsic() #166282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This generalizes handleVectorPmaddIntrinsic() to: - potentially handle floating-point type intrinsics (e.g., llvm.x86.avx512bf16.dpbf16ps.512). This usage is not enabled yet. - optionally not have addition/multiplication with an initialized zero guarantee that the corresponding output becomes initialized.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesThis generalizes handleVectorPmaddIntrinsic():
Full diff: https://github.com/llvm/llvm-project/pull/166282.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 471c6ec633a57..930ffaaf42c45 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3903,7 +3903,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// adding/"accumulating" %s. "Accumulation" stores the result in one
// of the source registers, but this accumulate vs. add distinction
// is lost when dealing with LLVM intrinsics.)
+ //
+ // ZeroPurifies means that multiplying a known-zero with an uninitialized
+ // value results in an initialized value. This is applicable for integer
+ // multiplication, but not floating-point (counter-example: NaN).
void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
+ bool ZeroPurifies,
unsigned EltSizeInBits = 0) {
IRBuilder<> IRB(&I);
@@ -3945,7 +3950,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert(AccumulatorType == ReturnType);
}
- FixedVectorType *ImplicitReturnType = ReturnType;
+ FixedVectorType *ImplicitReturnType =
+ cast<FixedVectorType>(getShadowTy(ReturnType));
// Step 1: instrument multiplication of corresponding vector elements
if (EltSizeInBits) {
ImplicitReturnType = cast<FixedVectorType>(
@@ -3964,30 +3970,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ReturnType->getNumElements() * ReductionFactor);
}
- // Multiplying an *initialized* zero by an uninitialized element results in
- // an initialized zero element.
- //
- // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
- // results in an unpoisoned value. We can therefore adapt the visitAnd()
- // instrumentation:
- // OutShadow = (SaNonZero & SbNonZero)
- // | (VaNonZero & SbNonZero)
- // | (SaNonZero & VbNonZero)
- // where non-zero is checked on a per-element basis (not per bit).
- Value *SZero = Constant::getNullValue(Va->getType());
- Value *VZero = Constant::getNullValue(Sa->getType());
- Value *SaNonZero = IRB.CreateICmpNE(Sa, SZero);
- Value *SbNonZero = IRB.CreateICmpNE(Sb, SZero);
- Value *VaNonZero = IRB.CreateICmpNE(Va, VZero);
- Value *VbNonZero = IRB.CreateICmpNE(Vb, VZero);
-
- Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero);
- Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero);
- Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero);
-
// Each element of the vector is represented by a single bit (poisoned or
// not) e.g., <8 x i1>.
- Value *And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero});
+ Value *SaNonZero = IRB.CreateIsNotNull(Sa);
+ Value *SbNonZero = IRB.CreateIsNotNull(Sb);
+ Value *And;
+ if (ZeroPurifies) {
+ // Multiplying an *initialized* zero by an uninitialized element results
+ // in an initialized zero element.
+ //
+ // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
+ // results in an unpoisoned value. We can therefore adapt the visitAnd()
+ // instrumentation:
+ // OutShadow = (SaNonZero & SbNonZero)
+ // | (VaNonZero & SbNonZero)
+ // | (SaNonZero & VbNonZero)
+ // where non-zero is checked on a per-element basis (not per bit).
+ Value *VaInt = Va;
+ Value *VbInt = Vb;
+ if (!Va->getType()->isIntegerTy()) {
+ VaInt = CreateAppToShadowCast(IRB, Va);
+ VbInt = CreateAppToShadowCast(IRB, Vb);
+ }
+
+ Value *VaNonZero = IRB.CreateIsNotNull(VaInt);
+ Value *VbNonZero = IRB.CreateIsNotNull(VbInt);
+
+ Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero);
+ Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero);
+ Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero);
+
+ And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero});
+ } else {
+ And = IRB.CreateOr({SaNonZero, SbNonZero});
+ }
// Extend <8 x i1> to <8 x i16>.
// (The real pmadd intrinsic would have computed intermediate values of
@@ -5752,17 +5768,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
case Intrinsic::x86_avx2_pmadd_ub_sw:
case Intrinsic::x86_avx512_pmaddubs_w_512:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
+ handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2,
+ /*ZeroPurifies=*/true);
break;
// <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
case Intrinsic::x86_ssse3_pmadd_ub_sw:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
+ handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2,
+ /*ZeroPurifies=*/true, /*EltSize=*/8);
break;
// <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
case Intrinsic::x86_mmx_pmadd_wd:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+ handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2,
+ /*ZeroPurifies=*/true, /*EltSize=*/16);
break;
// AVX Vector Neural Network Instructions: bytes
@@ -5848,7 +5867,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx2_vpdpbuuds_128:
case Intrinsic::x86_avx2_vpdpbuuds_256:
case Intrinsic::x86_avx10_vpdpbuuds_512:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4, /*EltSize=*/8);
+ handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4,
+ /*ZeroPurifies=*/true, /*EltSize=*/8);
break;
// AVX Vector Neural Network Instructions: words
@@ -5901,7 +5921,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx512_vpdpwssds_128:
case Intrinsic::x86_avx512_vpdpwssds_256:
case Intrinsic::x86_avx512_vpdpwssds_512:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+ handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2,
+ /*ZeroPurifies=*/true, /*EltSize=*/16);
break;
// TODO: Dot Product of BF16 Pairs Accumulated Into Packed Single
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull Request Overview
This pull request refactors the MemorySanitizer's vector pmaddIntrinsic handling to support both integer and floating-point operations with different purification semantics. The key change introduces a ZeroPurifies parameter to distinguish between operations where multiplying by zero purifies uninitialized values (integer multiplication) and operations where this does not hold (floating-point multiplication, e.g., NaN behavior).
Key changes:
- Added
ZeroPurifiesboolean parameter tohandleVectorPmaddIntrinsicfunction - Refactored shadow propagation logic to conditionally apply zero-purification optimization based on the parameter
- Updated all existing intrinsic call sites to explicitly pass
ZeroPurifies=truefor integer operations
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| FixedVectorType *ImplicitReturnType = | ||
| cast<FixedVectorType>(getShadowTy(ReturnType)); |
Copilot
AI
Nov 5, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The assignment of ImplicitReturnType is overwritten at line 3957 when EltSizeInBits is non-zero. This initialization should be moved inside the else block (line 3968) to avoid unnecessary computation when it will be replaced.
| // where non-zero is checked on a per-element basis (not per bit). | ||
| Value *VaInt = Va; | ||
| Value *VbInt = Vb; | ||
| if (!Va->getType()->isIntegerTy()) { |
Copilot
AI
Nov 5, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The condition !Va->getType()->isIntegerTy() checks if the type is not integer, but Va is a vector type according to the context. This should check if the vector's element type is not integer using !Va->getType()->getScalarType()->isIntegerTy() instead.
| if (!Va->getType()->isIntegerTy()) { | |
| if (!Va->getType()->getScalarType()->isIntegerTy()) { |
This generalizes
handleVectorPmaddIntrinsic():llvm.x86.avx512bf16.dpbf16ps.512). This usage is not enabled yet.