Skip to content

Commit

Permalink
LowerMemIntrinsics: Skip memmove with different address spaces
Browse files Browse the repository at this point in the history
This is a quick fix for an assert when the source and dest have
different address spaces. The pointer compare needs to have matching
types, but we can't generically introduce addrspacecast and we don't
know if the address spaces alias.
  • Loading branch information
arsenm committed Jun 10, 2023
1 parent f3837e7 commit 6d2e5c3
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 19 deletions.
5 changes: 3 additions & 2 deletions llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ void createMemCpyLoopKnownSize(
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
ScalarEvolution *SE = nullptr);

/// Expand \p MemMove as a loop. \p MemMove is not deleted.
void expandMemMoveAsLoop(MemMoveInst *MemMove);
/// Expand \p MemMove as a loop. \p MemMove is not deleted. Returns true if the
/// memmove was lowered.
bool expandMemMoveAsLoop(MemMoveInst *MemMove);

/// Expand \p MemSet as a loop. \p MemSet is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet);
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,10 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
LookupLibInfo(*ParentFunc).has(LibFunc_memmove))
break;

expandMemMoveAsLoop(Memmove);
Changed = true;
Memmove->eraseFromParent();
if (expandMemMoveAsLoop(Memmove)) {
Changed = true;
Memmove->eraseFromParent();
}
}

break;
Expand Down
39 changes: 25 additions & 14 deletions llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <optional>

#define DEBUG_TYPE "lower-mem-intrinsics"

using namespace llvm;

void llvm::createMemCpyLoopKnownSize(
Expand Down Expand Up @@ -373,7 +376,7 @@ void llvm::createMemCpyLoopUnknownSize(
// }
// return dst;
// }
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
static bool createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, Value *CopyLen, Align SrcAlign,
Align DstAlign, bool SrcIsVolatile,
bool DstIsVolatile) {
Expand All @@ -385,10 +388,16 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
// TODO: Use different element type if possible?
IRBuilder<> CastBuilder(InsertBefore);
Type *EltTy = CastBuilder.getInt8Ty();
Type *PtrTy =
CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);

// FIXME: We don't know generically if it's legal to introduce an
// addrspacecast. We need to know either if it's legal to insert an
// addrspacecast, or if the address spaces cannot alias.
if (SrcAddr->getType()->getPointerAddressSpace() !=
DstAddr->getType()->getPointerAddressSpace()) {
LLVM_DEBUG(dbgs() << "Do not know how to expand memmove between different "
"address spaces\n");
return false;
}

// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
Expand Down Expand Up @@ -464,6 +473,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,

BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
ElseTerm->eraseFromParent();
return true;
}

static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
Expand Down Expand Up @@ -552,15 +562,16 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
}
}

void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
createMemMoveLoop(/* InsertBefore */ Memmove,
/* SrcAddr */ Memmove->getRawSource(),
/* DstAddr */ Memmove->getRawDest(),
/* CopyLen */ Memmove->getLength(),
/* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
/* DestAlign */ Memmove->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memmove->isVolatile(),
/* DstIsVolatile */ Memmove->isVolatile());
bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
return createMemMoveLoop(
/* InsertBefore */ Memmove,
/* SrcAddr */ Memmove->getRawSource(),
/* DstAddr */ Memmove->getRawDest(),
/* CopyLen */ Memmove->getLength(),
/* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
/* DestAlign */ Memmove->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memmove->isVolatile(),
/* DstIsVolatile */ Memmove->isVolatile());
}

void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3)
declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
declare void @llvm.memmove.p0.p1.i64(ptr nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memmove.p0.p5.i64(ptr nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1

declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1

Expand Down Expand Up @@ -1307,5 +1313,59 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1)
ret void
}

define amdgpu_kernel void @memmove_flat_align1_global_align1(ptr %dst, ptr addrspace(1) %src) {
; OPT-LABEL: @memmove_flat_align1_global_align1(
; OPT-NEXT: call void @llvm.memmove.p0.p1.i64(ptr [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p0.p1.i64(ptr %dst, ptr addrspace(1) %src, i64 256, i1 false)
ret void
}

define amdgpu_kernel void @memmove_global_align1_flat_align1(ptr addrspace(1) %dst, ptr %src) {
; OPT-LABEL: @memmove_global_align1_flat_align1(
; OPT-NEXT: call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) [[DST:%.*]], ptr [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) %dst, ptr %src, i64 256, i1 false)
ret void
}

define amdgpu_kernel void @memmove_flat_align1_private_align1(ptr %dst, ptr addrspace(5) %src) {
; OPT-LABEL: @memmove_flat_align1_private_align1(
; OPT-NEXT: call void @llvm.memmove.p0.p5.i64(ptr [[DST:%.*]], ptr addrspace(5) [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p0.p5.i64(ptr %dst, ptr addrspace(5) %src, i64 256, i1 false)
ret void
}

define amdgpu_kernel void @memmove_private_align1_flat_align1(ptr addrspace(5) %dst, ptr %src) {
; OPT-LABEL: @memmove_private_align1_flat_align1(
; OPT-NEXT: call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) [[DST:%.*]], ptr [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) %dst, ptr %src, i64 256, i1 false)
ret void
}

define amdgpu_kernel void @memmove_private_align1_global_align1(ptr addrspace(5) %dst, ptr addrspace(1) %src) {
; OPT-LABEL: @memmove_private_align1_global_align1(
; OPT-NEXT: call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) %dst, ptr addrspace(1) %src, i64 256, i1 false)
ret void
}

define amdgpu_kernel void @memmove_global_align1_private_align1(ptr addrspace(1) %dst, ptr addrspace(5) %src) {
; OPT-LABEL: @memmove_global_align1_private_align1(
; OPT-NEXT: call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(5) [[SRC:%.*]], i64 256, i1 false)
; OPT-NEXT: ret void
;
call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) %dst, ptr addrspace(5) %src, i64 256, i1 false)
ret void
}

attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }

0 comments on commit 6d2e5c3

Please sign in to comment.