Skip to content

Commit

Permalink
[IR] Require that ptrmask mask matches pointer index size (#69343)
Browse files Browse the repository at this point in the history
Currently, we specify that the ptrmask intrinsic allows the mask to have
any size, which will be zero-extended or truncated to the pointer size.

However, what semantics of the specified GEP expansion actually imply is
that the mask is only meaningful up to the pointer type *index* size --
any higher bits of the pointer will always be preserved. In other words,
the mask gets 1-extended from the index size to the pointer size. This
is also the behavior we want for CHERI architectures.

This PR makes two changes:
* It spells out the interaction with the pointer type index size more
explicitly.
* It requires that the mask matches the pointer type index size. The
intention here is to make handling of this intrinsic more robust, to
avoid accidental mix-ups of pointer size and index size in code
generating this intrinsic. If a zero-extend or truncate of the mask is
desired, it should just be done explicitly in IR. This also cuts down on
the amount of testing we have to do, and things transforms needs to
check for.

As far as I can tell, we don't actually support pointers with different
index type size at the SDAG level, so I'm just asserting the sizes match
there for now. Out-of-tree targets using different index sizes may need
to adjust that code.
  • Loading branch information
nikic committed Oct 24, 2023
1 parent c45466c commit eb86de6
Show file tree
Hide file tree
Showing 14 changed files with 100 additions and 400 deletions.
28 changes: 20 additions & 8 deletions llvm/docs/LangRef.rst
Expand Up @@ -26964,7 +26964,8 @@ Arguments:
""""""""""

The first argument is a pointer or vector of pointers. The second argument is
an integer or vector of integers.
an integer or vector of integers with the same bit width as the index type
size of the first argument.

Overview:
""""""""""
Expand All @@ -26977,13 +26978,24 @@ to facilitate alias analysis and underlying-object detection.
Semantics:
""""""""""

The result of ``ptrmask(ptr, mask)`` is equivalent to
``getelementptr ptr, (ptrtoint(ptr) & mask) - ptrtoint(ptr)``. Both the returned
pointer(s) and the first argument are based on the same underlying object (for more
information on the *based on* terminology see
:ref:`the pointer aliasing rules <pointeraliasing>`). If the bitwidth of the
mask argument does not match the pointer size of the target, the mask is
zero-extended or truncated accordingly.
The result of ``ptrmask(%ptr, %mask)`` is equivalent to the following expansion,
where ``iPtrIdx`` is the index type size of the pointer::

%intptr = ptrtoint ptr %ptr to iPtrIdx ; this may truncate
%masked = and iPtrIdx %intptr, %mask
%diff = sub iPtrIdx %masked, %intptr
%result = getelementptr i8, ptr %ptr, iPtrIdx %diff

If the pointer index type size is smaller than the pointer type size, this
implies that pointer bits beyond the index size are not affected by this
intrinsic. For integral pointers, it behaves as if the mask were extended with
1 bits to the pointer type size.

Both the returned pointer(s) and the first argument are based on the same
underlying object (for more information on the *based on* terminology see
:ref:`the pointer aliasing rules <pointeraliasing>`).

The intrinsic only captures the pointer argument through the return value.

.. _int_threadlocal_address:

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/ValueTracking.cpp
Expand Up @@ -1637,8 +1637,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
const Value *Mask = I->getOperand(1);
Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
computeKnownBits(Mask, Known2, Depth + 1, Q);
// This is basically a pointer typed and.
Known &= Known2.zextOrTrunc(Known.getBitWidth());
// TODO: 1-extend would be more precise.
Known &= Known2.anyextOrTrunc(BitWidth);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -7426,11 +7426,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Const = getValue(I.getOperand(1));
SDValue Mask = getValue(I.getOperand(1));

EVT PtrVT = Ptr.getValueType();
setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
assert(PtrVT == Mask.getValueType() &&
"Pointers with different index type are not supported by SDAG");
setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
return;
}
case Intrinsic::threadlocal_address: {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Expand Up @@ -5993,6 +5993,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"llvm.ptrmask intrinsic arguments must have the same number of "
"elements",
&Call);
Check(DL.getIndexTypeSizeInBits(Ty0) == Ty1->getScalarSizeInBits(),
"llvm.ptrmask intrinsic second argument bitwidth must match "
"pointer index type size of first argument",
&Call);
break;
}
};
Expand Down
13 changes: 6 additions & 7 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Expand Up @@ -1966,13 +1966,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (match(II->getArgOperand(0),
m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
m_Value(InnerMask))))) {
if (II->getArgOperand(1)->getType() == InnerMask->getType()) {
Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
return replaceInstUsesWith(
*II,
Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
{InnerPtr, NewMask}));
}
assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
"Mask types must match");
Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
return replaceInstUsesWith(
*II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
{InnerPtr, NewMask}));
}
break;
}
Expand Down
15 changes: 0 additions & 15 deletions llvm/test/CodeGen/AArch64/lower-ptrmask.ll
Expand Up @@ -12,18 +12,3 @@ define ptr @test1(ptr %src) {
%ptr = call ptr @llvm.ptrmask.p0.i64(ptr %src, i64 72057594037927928)
ret ptr %ptr
}

declare ptr @llvm.ptrmask.p0.i32(ptr, i32)

; CHECK-LABEL: name: test2
; CHECK: %0:gpr64 = COPY $x0
; CHECK-NEXT: %1:gpr32 = MOVi32imm 10000
; CHECK-NEXT: %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32
; CHECK-NEXT: %3:gpr64 = ANDXrr %0, killed %2
; CHECK-NEXT: $x0 = COPY %3
; CHECK-NEXT: RET_ReallyLR implicit $x0

define ptr @test2(ptr %src) {
%ptr = call ptr @llvm.ptrmask.p0.i32(ptr %src, i32 10000)
ret ptr %ptr
}
108 changes: 0 additions & 108 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
Expand Up @@ -21,78 +21,6 @@ define ptr @ptrmask_flat_i64(ptr %ptr, i64 %mask) {
ret ptr %masked
}

define ptr @ptrmask_flat_i32(ptr %ptr, i32 %mask) {
; CHECK-LABEL: name: ptrmask_flat_i32
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%masked = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 %mask)
ret ptr %masked
}

define ptr @ptrmask_flat_i16(ptr %ptr, i16 %mask) {
; CHECK-LABEL: name: ptrmask_flat_i16
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%masked = call ptr @llvm.ptrmask.p0.i16(ptr %ptr, i16 %mask)
ret ptr %masked
}

define ptr @ptrmask_flat_i1(ptr %ptr, i1 %mask) {
; CHECK-LABEL: name: ptrmask_flat_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%masked = call ptr @llvm.ptrmask.p0.i1(ptr %ptr, i1 %mask)
ret ptr %masked
}

define ptr addrspace(3) @ptrmask_local_i64(ptr addrspace(3) %ptr, i64 %mask) {
; CHECK-LABEL: name: ptrmask_local_i64
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
ret ptr addrspace(3) %masked
}

define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
; CHECK-LABEL: name: ptrmask_local_i32
; CHECK: bb.1 (%ir-block.0):
Expand All @@ -107,47 +35,11 @@ define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
ret ptr addrspace(3) %masked
}

define ptr addrspace(3) @ptrmask_local_i16(ptr addrspace(3) %ptr, i16 %mask) {
; CHECK-LABEL: name: ptrmask_local_i16
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask)
ret ptr addrspace(3) %masked
}

define ptr addrspace(3) @ptrmask_local_i1(ptr addrspace(3) %ptr, i1 %mask) {
; CHECK-LABEL: name: ptrmask_local_i1
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3) %ptr, i1 %mask)
ret ptr addrspace(3) %masked
}

; Seems to not work
; define <2 x ptr> @ptrmask_flat_i64_v2(<2 x ptr> %ptr, <2 x i64> %mask) {
; %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> %mask)
; ret <2 x ptr> %masked
; }

declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
declare ptr @llvm.ptrmask.p0.i16(ptr, i16)
declare ptr @llvm.ptrmask.p0.i1(ptr, i1)
declare ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3), i64)
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32)
declare ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3), i16)
declare ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3), i1)

0 comments on commit eb86de6

Please sign in to comment.