Skip to content

Commit

Permalink
Instcombine: destructor loads of structs that do not contains padding
Browse files Browse the repository at this point in the history
For non padded structs, we can just proceed and deaggregate them.
We don't want ot do this when there is padding in the struct as to not
lose information about this padding (the subsequents passes would then
try hard to preserve the padding, which is undesirable).

Also update extractvalue.ll and cast.ll so that they use structs with padding.

Remove the FIXME in the extractvalue of laod case as the non padded case is
handled when processing the load, and we don't want to do it on the padded
case.

Patch by: Amaury SECHET <deadalnix@gmail.com>

Differential Revision: http://reviews.llvm.org/D14483

From: Mehdi Amini <mehdi.amini@apple.com>
llvm-svn: 255600
  • Loading branch information
joker-eph committed Dec 15, 2015
1 parent fd10d98 commit 1c131b3
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 92 deletions.
7 changes: 6 additions & 1 deletion llvm/include/llvm/IR/DataLayout.h
Expand Up @@ -475,7 +475,8 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
class StructLayout {
uint64_t StructSize;
unsigned StructAlignment;
unsigned NumElements;
bool IsPadded : 1;
unsigned NumElements : 31;
uint64_t MemberOffsets[1]; // variable sized array!
public:
uint64_t getSizeInBytes() const { return StructSize; }
Expand All @@ -484,6 +485,10 @@ class StructLayout {

unsigned getAlignment() const { return StructAlignment; }

/// Returns whether the struct has padding or not between its fields.
/// NB: Padding in nested element is not taken into account.
bool hasPadding() const { return IsPadded; }

/// \brief Given a valid byte offset into the structure, returns the structure
/// index that contains it.
unsigned getElementContainingOffset(uint64_t Offset) const;
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/IR/DataLayout.cpp
Expand Up @@ -41,6 +41,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
StructAlignment = 0;
StructSize = 0;
IsPadded = false;
NumElements = ST->getNumElements();

// Loop over each of the elements, placing them in memory.
Expand All @@ -49,8 +50,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);

// Add padding if necessary to align the data element properly.
if ((StructSize & (TyAlign-1)) != 0)
if ((StructSize & (TyAlign-1)) != 0) {
IsPadded = true;
StructSize = RoundUpToAlignment(StructSize, TyAlign);
}

// Keep track of maximum alignment constraint.
StructAlignment = std::max(TyAlign, StructAlignment);
Expand All @@ -64,8 +67,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {

// Add padding to the end of the struct so that it could be put in an array
// and all array elements would be aligned correctly.
if ((StructSize & (StructAlignment-1)) != 0)
if ((StructSize & (StructAlignment-1)) != 0) {
IsPadded = true;
StructSize = RoundUpToAlignment(StructSize, StructAlignment);
}
}


Expand Down
57 changes: 55 additions & 2 deletions llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Expand Up @@ -524,12 +524,40 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {

if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
if (ST->getNumElements() == 1) {
unsigned Count = ST->getNumElements();
if (Count == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
".unpack");
return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, LI.getName()));
}

// We don't want to break loads with padding here as we'd loose
// the knowledge that padding exists for the rest of the pipeline.
const DataLayout &DL = IC.getDataLayout();
auto *SL = DL.getStructLayout(ST);
if (SL->hasPadding())
return nullptr;

auto Name = LI.getName();
auto LoadName = LI.getName() + ".unpack";
auto EltName = Name + ".elt";
auto *Addr = LI.getPointerOperand();
Value *V = UndefValue::get(T);
auto *IdxType = Type::getInt32Ty(ST->getContext());
auto *Zero = ConstantInt::get(IdxType, 0);
for (unsigned i = 0; i < Count; i++) {
Value *Indices[2] = {
Zero,
ConstantInt::get(IdxType, i),
};
auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName);
auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName);
V = IC.Builder->CreateInsertValue(V, L, i);
}

V->setName(Name);
return IC.ReplaceInstUsesWith(LI, V);
}

if (auto *AT = dyn_cast<ArrayType>(T)) {
Expand Down Expand Up @@ -902,11 +930,36 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {

if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
if (ST->getNumElements() == 1) {
unsigned Count = ST->getNumElements();
if (Count == 1) {
V = IC.Builder->CreateExtractValue(V, 0);
combineStoreToNewValue(IC, SI, V);
return true;
}

// We don't want to break loads with padding here as we'd loose
// the knowledge that padding exists for the rest of the pipeline.
const DataLayout &DL = IC.getDataLayout();
auto *SL = DL.getStructLayout(ST);
if (SL->hasPadding())
return false;

auto EltName = V->getName() + ".elt";
auto *Addr = SI.getPointerOperand();
auto AddrName = Addr->getName() + ".repack";
auto *IdxType = Type::getInt32Ty(ST->getContext());
auto *Zero = ConstantInt::get(IdxType, 0);
for (unsigned i = 0; i < Count; i++) {
Value *Indices[2] = {
Zero,
ConstantInt::get(IdxType, i),
};
auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName);
auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
IC.Builder->CreateStore(Val, Ptr);
}

return true;
}

if (auto *AT = dyn_cast<ArrayType>(T)) {
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Expand Up @@ -2311,9 +2311,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
}
if (LoadInst *L = dyn_cast<LoadInst>(Agg))
// If the (non-volatile) load only has one use, we can rewrite this to a
// load from a GEP. This reduces the size of the load.
// FIXME: If a load is used only by extractvalue instructions then this
// could be done regardless of having multiple uses.
// load from a GEP. This reduces the size of the load. If a load is used
// only by extractvalue instructions then this either must have been
// optimized before, or it is a struct with padding, in which case we
// don't want to do the transformation as it loses padding knowledge.
if (L->isSimple() && L->hasOneUse()) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/cast.ll
Expand Up @@ -722,7 +722,7 @@ define i1 @test67(i1 %a, i32 %b) {
; CHECK: ret i1 false
}

%s = type { i32, i32, i32 }
%s = type { i32, i32, i16 }

define %s @test68(%s *%p, i64 %i) {
; CHECK-LABEL: @test68(
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/Transforms/InstCombine/extractvalue.ll
Expand Up @@ -48,16 +48,16 @@ define i32 @foo(i32 %a, i32 %b) {
; CHECK: call {{.*}}(i32 [[LOAD]])
; CHECK-NOT: extractvalue
; CHECK: ret i32 [[LOAD]]
define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
define i32 @extract2gep({i16, i32}* %pair, i32* %P) {
; The load + extractvalue should be converted
; to an inbounds gep + smaller load.
; The new load should be in the same spot as the old load.
%L = load {i32, i32}, {i32, i32}* %pair
%L = load {i16, i32}, {i16, i32}* %pair
store i32 0, i32* %P
br label %loop

loop:
%E = extractvalue {i32, i32} %L, 1
%E = extractvalue {i16, i32} %L, 1
%C = call i32 @baz(i32 %E)
store i32 %C, i32* %P
%cond = icmp eq i32 %C, 0
Expand All @@ -67,17 +67,17 @@ end:
ret i32 %E
}

; CHECK-LABEL: define i32 @doubleextract2gep(
; CHECK-LABEL: define i16 @doubleextract2gep(
; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1
; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
; CHECK-NEXT: ret i32 [[LOAD]]
define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]]
; CHECK-NEXT: ret i16 [[LOAD]]
define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) {
; The load + extractvalues should be converted
; to a 3-index inbounds gep + smaller load.
%L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg
%E1 = extractvalue {i32, {i32, i32}} %L, 1
%E2 = extractvalue {i32, i32} %E1, 1
ret i32 %E2
%L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg
%E1 = extractvalue {i16, {i32, i16}} %L, 1
%E2 = extractvalue {i32, i16} %E1, 1
ret i16 %E2
}

; CHECK: define i32 @nogep-multiuse
Expand Down

0 comments on commit 1c131b3

Please sign in to comment.