Skip to content

Commit

Permalink
[ArgumentPromotion] Fix byval alignment handling.
Browse files Browse the repository at this point in the history
Make sure the alignment of the generated operations matches the
alignment of the byval argument.  Previously, we were just ignoring
alignment and getting lucky.

While I'm here, also delete the unnecessary "tail" handling.
Passing a pointer to a byval argument to a "tail" call is UB, so
rewriting to an alloca doesn't require any special handling.

Differential Revision: https://reviews.llvm.org/D89819
  • Loading branch information
efriedma-quic committed May 11, 2021
1 parent 4975587 commit 61cbbba
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 96 deletions.
36 changes: 17 additions & 19 deletions llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
Expand Up @@ -243,6 +243,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// to pass in the loaded pointers.
//
SmallVector<Value *, 16> Args;
const DataLayout &DL = F->getParent()->getDataLayout();
while (!F->use_empty()) {
CallBase &CB = cast<CallBase>(*F->user_back());
assert(CB.getCalledFunction() == F);
Expand All @@ -264,13 +265,17 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
const StructLayout *SL = DL.getStructLayout(STy);
Align StructAlign = *I->getParamAlign();
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
auto *Idx =
IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
// TODO: Tell AA about the new values?
Args.push_back(IRB.CreateLoad(STy->getElementType(i), Idx,
Idx->getName() + ".val"));
Align Alignment =
commonAlignment(StructAlign, SL->getElementOffset(i));
Args.push_back(IRB.CreateAlignedLoad(
STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val"));
ArgAttrVec.push_back(AttributeSet());
}
} else if (!I->use_empty()) {
Expand Down Expand Up @@ -358,8 +363,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
CB.eraseFromParent();
}

const DataLayout &DL = F->getParent()->getDataLayout();

// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
Expand All @@ -386,35 +389,27 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,

// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
Value *TheAlloca = new AllocaInst(
AgTy, DL.getAllocaAddrSpace(), nullptr,
I->getParamAlign().getValueOr(DL.getPrefTypeAlign(AgTy)), "",
InsertPt);
Align StructAlign = *I->getParamAlign();
Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
StructAlign, "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
nullptr};
const StructLayout *SL = DL.getStructLayout(STy);

for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx = GetElementPtrInst::Create(
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
I2->setName(I->getName() + "." + Twine(i));
new StoreInst(&*I2++, Idx, InsertPt);
Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
}

// Anything that used the arg should now use the alloca.
I->replaceAllUsesWith(TheAlloca);
TheAlloca->takeName(&*I);

// If the alloca is used in a call, we must clear the tail flag since
// the callee now uses an alloca from the caller.
for (User *U : TheAlloca->users()) {
CallInst *Call = dyn_cast<CallInst>(U);
if (!Call)
continue;
Call->setTailCall(false);
}
continue;
}

Expand Down Expand Up @@ -949,7 +944,10 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
// packed or if we can prove the padding bytes are never accessed.
bool isSafeToPromote = PtrArg->hasByValAttr() &&
//
// Only handle arguments with specified alignment; if it's unspecified, the
// actual alignment of the argument is target-specific.
bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() &&
(ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
!canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/ArgumentPromotion/attrs.ll
Expand Up @@ -5,11 +5,11 @@
%struct.ss = type { i32, i64 }

; Don't drop 'byval' on %X here.
define internal void @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32 %i) nounwind {
define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b, i32* byval(i32) align 4 %X, i32 %i) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) [[X:%.*]], i32 [[I:%.*]])
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]], i32 [[I:%.*]]) [[ATTR0:#.*]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
Expand All @@ -35,7 +35,7 @@ entry:
; Also make sure we don't drop the call zeroext attribute.
define i32 @test(i32* %X) {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32* [[X:%.*]])
; CHECK-SAME: (i32* [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
Expand All @@ -46,7 +46,7 @@ define i32 @test(i32* %X) {
; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) [[X]], i32 zeroext 0)
; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]], i32 zeroext 0)
; CHECK-NEXT: ret i32 0
;
entry:
Expand All @@ -56,7 +56,7 @@ entry:
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4

call void @f(%struct.ss* byval(%struct.ss) %S, i32* byval(i32) %X, i32 zeroext 0)
call void @f( %struct.ss* byval(%struct.ss) align 4 %S, i32* byval(i32) align 4 %X, i32 zeroext 0)

ret i32 0
}
16 changes: 8 additions & 8 deletions llvm/test/Transforms/ArgumentPromotion/byval-2.ll
Expand Up @@ -7,13 +7,13 @@

%struct.ss = type { i32, i64 }

define internal void @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X) nounwind {
define internal void @f(%struct.ss* byval(%struct.ss) align 8 %b, i32* byval(i32) align 4 %X) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) [[X:%.*]])
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]]) [[ATTR0:#.*]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4
; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 8
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
Expand All @@ -35,26 +35,26 @@ entry:

define i32 @test(i32* %X) {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32* [[X:%.*]])
; CHECK-SAME: (i32* [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4
; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 8
; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) [[X]])
; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]])
; CHECK-NEXT: ret i32 0
;
entry:
%S = alloca %struct.ss
%S = alloca %struct.ss, align 8
%tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %tmp1, align 8
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4
call void @f(%struct.ss* byval(%struct.ss) %S, i32* byval(i32) %X)
call void @f( %struct.ss* byval(%struct.ss) align 8 %S, i32* byval(i32) align 4 %X)
ret i32 0
}
39 changes: 29 additions & 10 deletions llvm/test/Transforms/ArgumentPromotion/byval.ll
Expand Up @@ -6,9 +6,9 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1

%struct.ss = type { i32, i64 }

define internal void @f(%struct.ss* byval(%struct.ss) %b) nounwind {
define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) [[ATTR0:#.*]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
Expand All @@ -32,11 +32,11 @@ entry:

define internal void @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@g
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) [[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4
; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 32
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
Expand All @@ -53,11 +53,28 @@ entry:
ret void
}

; Don't transform if alignment isn't specified; the actual alignment
; is target-specific, and not exposed anywhere.
;
; (If we ever change byval so a missing alignment isn't legal, we can
; just delete this test.)
define internal void @h(%struct.ss* byval(%struct.ss) %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@h
; CHECK-SAME: (%struct.ss* byval(%struct.ss) %b)
;
entry:
%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%tmp1 = load i32, i32* %tmp, align 4
%tmp2 = add i32 %tmp1, 1
store i32 %tmp2, i32* %tmp, align 4
ret void
}

define i32 @main() nounwind {
; CHECK-LABEL: define {{[^@]+}}@main()
; CHECK-LABEL: define {{[^@]+}}@main
; CHECK-SAME: () [[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
Expand All @@ -68,20 +85,22 @@ define i32 @main() nounwind {
; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4
; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 32
; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
; CHECK-NEXT: call void @h(%struct.ss* byval(%struct.ss) %S)
; CHECK-NEXT: ret i32 0
;
entry:
%S = alloca %struct.ss
%S = alloca %struct.ss, align 32
%tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %tmp1, align 8
%tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %tmp4, align 4
call void @f(%struct.ss* byval(%struct.ss) %S) nounwind
call void @g(%struct.ss* byval(%struct.ss) %S) nounwind
call void @f(%struct.ss* byval(%struct.ss) align 4 %S) nounwind
call void @g(%struct.ss* byval(%struct.ss) align 32 %S) nounwind
call void @h(%struct.ss* byval(%struct.ss) %S) nounwind
ret i32 0
}

Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/ArgumentPromotion/dbg.ll
Expand Up @@ -6,7 +6,7 @@ declare void @sink(i32)

define internal void @test(i32** %X) !dbg !2 {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3
; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) [[DBG3:!dbg !.*]] {
; CHECK-NEXT: call void @sink(i32 [[X_VAL_VAL]])
; CHECK-NEXT: ret void
;
Expand All @@ -18,10 +18,10 @@ define internal void @test(i32** %X) !dbg !2 {

%struct.pair = type { i32, i32 }

define internal void @test_byval(%struct.pair* byval(%struct.pair) %P) {
define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) {
; CHECK-LABEL: define {{[^@]+}}@test_byval
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]])
; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
Expand All @@ -33,20 +33,20 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) %P) {

define void @caller(i32** %Y, %struct.pair* %P) {
; CHECK-LABEL: define {{[^@]+}}@caller
; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]])
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4
; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4
; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg !4
; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5
; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5
; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5
; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5
; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5
; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]]) {
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, [[DBG4:!dbg !.*]]
; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, [[DBG4]]
; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), [[DBG4]]
; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, [[DBG5:!dbg !.*]]
; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, [[DBG5]]
; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, [[DBG5]]
; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, [[DBG5]]
; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), [[DBG5]]
; CHECK-NEXT: ret void
;
call void @test(i32** %Y), !dbg !1

call void @test_byval(%struct.pair* %P), !dbg !6
call void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P), !dbg !6
ret void
}

Expand Down
39 changes: 0 additions & 39 deletions llvm/test/Transforms/ArgumentPromotion/tail.ll

This file was deleted.

0 comments on commit 61cbbba

Please sign in to comment.