Skip to content

Commit

Permalink
Merge clang's isRepeatedBytePattern with LLVM's isBytewiseValue
Browse files Browse the repository at this point in the history
Summary:
his code was in CGDecl.cpp and really belongs in LLVM's isBytewiseValue. Teach isBytewiseValue the tricks clang's isRepeatedBytePattern had, including merging undef properly, and recursing on more types.

clang part of this patch: D51752

Subscribers: dexonsmith, llvm-commits

Differential Revision: https://reviews.llvm.org/D51751

llvm-svn: 342709
  • Loading branch information
jfbastien committed Sep 21, 2018
1 parent 9da65a3 commit 73d8e4e
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 55 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/Analysis/ValueTracking.h
Expand Up @@ -221,7 +221,8 @@ class Value;
/// return the i8 value that it is represented with. This is true for all i8
/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
/// i16 0x1234), return null.
/// i16 0x1234), return null. If the value is entirely undef and padding,
/// return undef.
Value *isBytewiseValue(Value *V);

/// Given an aggregrate and an sequence of indices, see if the scalar value
Expand Down
94 changes: 62 additions & 32 deletions llvm/lib/Analysis/ValueTracking.cpp
Expand Up @@ -3042,62 +3042,92 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
return true;
}

/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
Value *llvm::isBytewiseValue(Value *V) {

// All byte-wide stores are splatable, even of arbitrary variables.
if (V->getType()->isIntegerTy(8)) return V;
if (V->getType()->isIntegerTy(8))
return V;

LLVMContext &Ctx = V->getContext();

// Undef don't care.
auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
if (isa<UndefValue>(V))
return UndefInt8;

Constant *C = dyn_cast<Constant>(V);
if (!C) {
// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
// %b = shl i16 %a, 8
// %c = or i16 %a, %b
// but until there is an example that actually needs this, it doesn't seem
// worth worrying about.
return nullptr;
}

// Handle 'null' ConstantArrayZero etc.
if (Constant *C = dyn_cast<Constant>(V))
if (C->isNullValue())
return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
if (C->isNullValue())
return Constant::getNullValue(Type::getInt8Ty(Ctx));

// Constant float and double values can be handled as integer values if the
// Constant floating-point values can be handled as integer values if the
// corresponding integer value is "byteable". An important case is 0.0.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
if (CFP->getType()->isFloatTy())
V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
if (CFP->getType()->isDoubleTy())
V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
Type *Ty = nullptr;
if (CFP->getType()->isHalfTy())
Ty = Type::getInt16Ty(Ctx);
else if (CFP->getType()->isFloatTy())
Ty = Type::getInt32Ty(Ctx);
else if (CFP->getType()->isDoubleTy())
Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints.
return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
}

// We can handle constant integers that are multiple of 8 bits.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() % 8 == 0) {
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");

if (!CI->getValue().isSplat(8))
return nullptr;
return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
return ConstantInt::get(Ctx, CI->getValue().trunc(8));
}
}

// A ConstantDataArray/Vector is splatable if all its members are equal and
// also splatable.
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
Value *Elt = CA->getElementAsConstant(0);
Value *Val = isBytewiseValue(Elt);
if (!Val)
auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
if (LHS == RHS)
return LHS;
if (!LHS || !RHS)
return nullptr;
if (LHS == UndefInt8)
return RHS;
if (RHS == UndefInt8)
return LHS;
return nullptr;
};

for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
if (CA->getElementAsConstant(I) != Elt)
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
return nullptr;
return Val;
}

if (isa<ConstantVector>(C)) {
Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
return Splat ? isBytewiseValue(Splat) : nullptr;
}

if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
return nullptr;
return Val;
}

// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
// %b = shl i16 %a, 8
// %c = or i16 %a, %b
// but until there is an example that actually needs this, it doesn't seem
// worth worrying about.
// Don't try to handle the handful of other constants.
return nullptr;
}

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Expand Up @@ -348,6 +348,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
// FIXME: This could check for UndefValue because it can be merged into any
// other valid pattern.

// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Expand Down Expand Up @@ -645,9 +648,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,

if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
if (For == ForMemset::Yes) {
if (isa<UndefValue>(FirstSplatValue))
FirstSplatValue = SecondSplatValue;
if (FirstSplatValue != SecondSplatValue)
continue;
} else {
if (isa<UndefValue>(FirstPatternValue))
FirstPatternValue = SecondPatternValue;
if (FirstPatternValue != SecondPatternValue)
continue;
}
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Expand Up @@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!NextStore->isSimple()) break;

// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
break;

// Check to see if this store is to a constant offset from the start ptr.
Expand Down
33 changes: 26 additions & 7 deletions llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
Expand Up @@ -73,13 +73,16 @@ define void @copyalias(%S* %src, %S* %dst) {
ret void
}

; If the store address is computed ina complex manner, make
; If the store address is computed in a complex manner, make
; sure we lift the computation as well if needed and possible.
define void @addrproducer(%S* %src, %S* %dst) {
; CHECK-LABEL: addrproducer
; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
; CHECK-NEXT: store %S undef, %S* %dst
; CHECK-LABEL: addrproducer(
; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1
; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S undef, %S* %dst
Expand All @@ -89,7 +92,14 @@ define void @addrproducer(%S* %src, %S* %dst) {
}

define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
; CHECK-LABEL: aliasaddrproducer
; CHECK-LABEL: aliasaddrproducer(
; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src
; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
; CHECK-NEXT: store %S %[[SRC]], %S* %dst2
; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S undef, %S* %dst
%dstindex = load i32, i32* %dstidptr
Expand All @@ -99,7 +109,16 @@ define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
}

define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
; CHECK-LABEL: noaliasaddrproducer
; CHECK-LABEL: noaliasaddrproducer(
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr
; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S undef, %S* %src
%2 = load i32, i32* %dstidptr
Expand Down
98 changes: 84 additions & 14 deletions llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -1,19 +1,89 @@
; RUN: opt -memcpyopt -S < %s | FileCheck %s

@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @foo(i32*) nounwind

define void @test1() nounwind {
%arr = alloca [3 x i32], align 4
%arr_i8 = bitcast [3 x i32]* %arr to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %arr_i8, i8* align 4 bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false)
%arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0
call void @foo(i32* %arraydecay) nounwind

@undef = internal constant i32 undef, align 4
define void @test_undef() nounwind {
%a = alloca i32, align 4
%i8 = bitcast i32* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false)
ret void
; CHECK-LABEL: @test_undef(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

@i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
define void @test_i32x3() nounwind {
%a = alloca [3 x i32], align 4
%i8 = bitcast [3 x i32]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false)
ret void
; CHECK-LABEL: @test_i32x3(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

@i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4
define void @test_i32x3_undef() nounwind {
%a = alloca [3 x i32], align 4
%i8 = bitcast [3 x i32]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false)
ret void
; CHECK-LABEL: @test_i32x3_undef(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

%struct.bitfield = type { i8, [3 x i8] }
@bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4
define void @test_bitfield() nounwind {
%a = alloca %struct.bitfield, align 4
%i8 = bitcast %struct.bitfield* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false)
ret void
; CHECK-LABEL: @test_bitfield(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

@i1x16_zero = internal constant <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, align 4
define void @test_i1x16_zero() nounwind {
%a = alloca <16 x i1>, align 4
%i8 = bitcast <16 x i1>* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false)
ret void
; CHECK-LABEL: @test_i1x16_zero(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

; i1 isn't currently handled. Should it?
@i1x16_one = internal constant <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, align 4
define void @test_i1x16_one() nounwind {
%a = alloca <16 x i1>, align 4
%i8 = bitcast <16 x i1>* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false)
ret void
; CHECK-LABEL: @test_i1x16_one(
; CHECK-NOT: call void @llvm.memset
; CHECK: call void @llvm.memcpy
; CHECK: ret void
}

@half = internal constant half 0xH0000, align 4
define void @test_half() nounwind {
%a = alloca half, align 4
%i8 = bitcast half* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false)
ret void
; CHECK-LABEL: @test1(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
; CHECK-LABEL: @test_half(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}

0 comments on commit 73d8e4e

Please sign in to comment.