Skip to content

Commit

Permalink
[SelectionDAGBuilder] Stop setting alignment to one for hidden sret v…
Browse files Browse the repository at this point in the history
…alues

We allocated a suitably aligned frame index so we know that all the values
have ABI alignment.
For MIPS this avoids using pair of lwl + lwr instructions instead of a
single lw. I found this when compiling CHERI pure capability code where
we can't use the lwl/lwr unaligned loads/stores and and were to falling
back to a byte load + shift + or sequence.

This should save a few instructions for MIPS and possibly other backends
that don't have fast unaligned loads/stores.
It also improves code generation for CodeGen/X86/pr34653.ll and
CodeGen/WebAssembly/offset.ll since they can now use aligned loads.

Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D78999
  • Loading branch information
arichardson committed May 4, 2020
1 parent 3fc7388 commit d1ff003
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 61 deletions.
11 changes: 8 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -1833,6 +1833,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();

SmallVector<SDValue, 4> Chains(NumValues);
Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
Expand All @@ -1841,9 +1842,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
Chains[i] = DAG.getStore(
Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
commonAlignment(BaseAlign, Offsets[i]));
}

Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
Expand Down Expand Up @@ -9271,6 +9274,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);

MachineFunction &MF = CLI.DAG.getMachineFunction();
Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
Expand All @@ -9279,7 +9284,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
/* Alignment = */ 1);
HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
Expand Down
34 changes: 9 additions & 25 deletions llvm/test/CodeGen/Mips/implicit-sret.ll
Expand Up @@ -16,23 +16,13 @@ define internal void @test() unnamed_addr nounwind {
; CHECK-NEXT: daddiu $4, $sp, 8
; CHECK-NEXT: jal implicit_sret_decl
; CHECK-NEXT: nop
; CHECK-NEXT: # implicit-def: $at_64
; CHECK-NEXT: ldl $1, 24($sp)
; CHECK-NEXT: ldr $1, 31($sp)
; CHECK-NEXT: ld $6, 24($sp)
; CHECK-NEXT: ld $5, 16($sp)
; CHECK-NEXT: ld $7, 32($sp)
; CHECK-NEXT: lw $1, 8($sp)
; CHECK-NEXT: # implicit-def: $v0_64
; CHECK-NEXT: ldl $2, 16($sp)
; CHECK-NEXT: ldr $2, 23($sp)
; CHECK-NEXT: # implicit-def: $v1_64
; CHECK-NEXT: ldl $3, 32($sp)
; CHECK-NEXT: ldr $3, 39($sp)
; CHECK-NEXT: # implicit-def: $a1
; CHECK-NEXT: lwl $5, 8($sp)
; CHECK-NEXT: lwr $5, 11($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $5
; CHECK-NEXT: move $5, $2
; CHECK-NEXT: move $6, $1
; CHECK-NEXT: move $7, $3
; CHECK-NEXT: move $2, $1
; CHECK-NEXT: move $4, $2
; CHECK-NEXT: jal use_sret
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -75,15 +65,9 @@ define internal void @test2() unnamed_addr nounwind {
; CHECK-NEXT: daddiu $4, $sp, 0
; CHECK-NEXT: jal implicit_sret_decl2
; CHECK-NEXT: nop
; CHECK-NEXT: # implicit-def: $at
; CHECK-NEXT: lwl $1, 20($sp)
; CHECK-NEXT: lwr $1, 23($sp)
; CHECK-NEXT: # implicit-def: $v0
; CHECK-NEXT: lwl $2, 12($sp)
; CHECK-NEXT: lwr $2, 15($sp)
; CHECK-NEXT: # implicit-def: $v1
; CHECK-NEXT: lwl $3, 4($sp)
; CHECK-NEXT: lwr $3, 7($sp)
; CHECK-NEXT: lw $1, 20($sp)
; CHECK-NEXT: lw $2, 12($sp)
; CHECK-NEXT: lw $3, 4($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $3
; CHECK-NEXT: # implicit-def: $a1_64
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/WebAssembly/offset.ll
Expand Up @@ -645,9 +645,9 @@ define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q)

; CHECK-LABEL: aggregate_return:
; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK: i64.store 8($0):p2align=2, $pop[[L0]]{{$}}
; CHECK: i64.store 8($0), $pop[[L0]]{{$}}
; CHECK: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
; CHECK: i64.store 0($0):p2align=2, $pop[[L1]]{{$}}
; CHECK: i64.store 0($0), $pop[[L1]]{{$}}
define {i32,i32,i32,i32} @aggregate_return() {
ret {i32,i32,i32,i32} zeroinitializer
}
Expand Down
50 changes: 19 additions & 31 deletions llvm/test/CodeGen/X86/pr34653.ll
Expand Up @@ -15,28 +15,22 @@ define void @pr34653() {
; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK-NEXT: callq test
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm1
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm2
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm3
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm4
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm5
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm6
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm7
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm8
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm9
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm10
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm11
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm12
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm13
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm14
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm15
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
Expand All @@ -60,17 +54,11 @@ define void @pr34653() {
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
Expand Down

0 comments on commit d1ff003

Please sign in to comment.