Skip to content
Permalink
Browse files

[ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine

Summary:
PerformVMOVRRDCombine ommits adding a offset
of 4 to the PointerInfo, when converting a
f64 = load[M]
to
{i32, i32} = {load[M], load[M + 4]}

Which would allow the machine scheduller
to break dependencies with the second load.

 - pr42638

Reviewers: eli.friedman, dmgreen, ostannard

Reviewed By: ostannard

Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64870

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366423 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
Diogo N. Sampaio
Diogo N. Sampaio committed Jul 18, 2019
1 parent 3afc24f commit 0bf4f3f8528e6b79cba86ab2ac41d616567f0c6d
Showing with 38 additions and 3 deletions.
  1. +5 −3 lib/Target/ARM/ARMISelLowering.cpp
  2. +33 −0 test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll
@@ -11748,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,

SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
SDValue NewLD2 = DAG.getLoad(
MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());

SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
LD->getPointerInfo().getWithOffset(4),
std::min(4U, LD->getAlignment()),
LD->getMemOperand()->getFlags());

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
if (DCI.DAG.getDataLayout().isBigEndian())
@@ -0,0 +1,33 @@
; RUN: llc -stop-after=machine-scheduler -debug-only dagcombine,selectiondag -o - %s 2>&1 | FileCheck %s
; REQUIRES: asserts
; pr42638
target triple = "armv8r-arm-none-eabi"
%struct.__va_list = type { i8* }
define double @foo(i32 %P0, ...) #0 {
entry:
%V1 = alloca [8 x i8], align 8
%vl = alloca %struct.__va_list, align 4
%0 = getelementptr inbounds [8 x i8], [8 x i8]* %V1, i32 0, i32 0
call void asm sideeffect "", "r"(i8* nonnull %0)
%1 = bitcast %struct.__va_list* %vl to i8*
call void @llvm.va_start(i8* nonnull %1)
%2 = bitcast %struct.__va_list* %vl to double**
%argp.cur3 = load double*, double** %2, align 4
%v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4
ret double %v.sroa.0.0.copyload
}

declare void @llvm.va_start(i8*)

attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64" }

; Ensures that the machine scheduler does not move accessing the upper
; 32 bits of the double to before actually storing it to memory

; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4>
; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)>
; CHECK: INLINEASM
; CHECK: (load 4 from [[MEM]] + 4)
; CHECK-NOT: (store 4 into [[MEM]] + 4)


0 comments on commit 0bf4f3f

Please sign in to comment.
You can’t perform that action at this time.