Skip to content

Commit

Permalink
[SelectionDAG] Compute known bits of CopyFromReg
Browse files Browse the repository at this point in the history
Summary:
Teach SelectionDAG how to compute known bits of ISD::CopyFromReg if
the virtual reg used has one def only.

This can be particularly useful when calling isBaseWithConstantOffset()
with the ISD::CopyFromReg argument, as more optimizations may get enabled
in the result.

Also add a missing truncation on X86, found by testing of this patch.

Change-Id: Id1c9fceec862d118c54a5b53adf72ada5d6daefa

Reviewers: bogner, craig.topper, RKSimon

Reviewed By: RKSimon

Subscribers: lebedev.ri, nemanjai, jvesely, nhaehnle, javed.absar, jsji, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59535

llvm-svn: 357745
  • Loading branch information
piotrAMD committed Apr 5, 2019
1 parent 94cd066 commit 0376ac1
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 27 deletions.
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
Expand Down Expand Up @@ -3202,6 +3203,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One &= Known2.One;
break;
}
case ISD::CopyFromReg: {
auto R = cast<RegisterSDNode>(Op.getOperand(1));
const unsigned Reg = R->getReg();

const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (!TRI->isVirtualRegister(Reg))
break;

const MachineRegisterInfo *MRI = &MF->getRegInfo();
if (!MRI->hasOneDef(Reg))
break;

const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg);
if (!LOI || LOI->Known.getBitWidth() != BitWidth)
break;

Known = LOI->Known;
break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19580,10 +19580,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);

// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
// If the operand types disagree, extend or truncate the shift amount to match.
// Since BT ignores high bits (like shifts) we can use anyextend for the extension.
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType());

X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
dl, MVT::i8);
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ main_body:

;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb:
;CHECK-NOT: s_waitcnt;
;CHECK: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
;CHECK-NOT: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
Expand All @@ -127,10 +127,8 @@ bb1: ; preds = %main_body

;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
;CHECK-NOT: s_waitcnt;
;CHECK: v_or_b32
;CHECK: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
;CHECK-NOT: v_or_b32
;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/ARM/atomic-op.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ entry:
ret void
}

define void @func2() nounwind {
define void @func2(i16 %int_val) nounwind {
entry:
%val = alloca i16
%old = alloca i16
store i16 31, i16* %val
store i16 %int_val, i16* %val
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand All @@ -197,7 +197,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%0 = atomicrmw umin i16* %val, i16 16 monotonic
store i16 %0, i16* %old
%uneg = sub i16 0, 1
%uneg = sub i16 0, 2
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
Expand Down Expand Up @@ -249,7 +249,7 @@ entry:
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
; CHECK-BAREMETAL: cmp
; CHECK-BAREMETAL-NOT: __sync
%uneg = sub i8 0, 1
%uneg = sub i8 0, 2
%1 = atomicrmw umin i8* %val, i8 %uneg monotonic
store i8 %1, i8* %old
; CHECK: ldrex
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/CodeGen/PowerPC/pr35688.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,14 @@
; Function Attrs: nounwind
define void @ec_GFp_nistp256_points_mul() {
; CHECK-LABEL: ec_GFp_nistp256_points_mul:
; CHECK: ld 5, 0(3)
; CHECK: li 3, 127
; CHECK: li 4, 0
; CHECK: subfic 6, 5, 0
; CHECK: subfze 6, 4
; CHECK: sradi 7, 6, 63
; CHECK: srad 6, 6, 3
; CHECK: subfc 5, 5, 7
; CHECK: subfe 5, 4, 6
; CHECK: ld 4, 0(3)
; CHECK: li 3, 0
; CHECK: subfic 5, 4, 0
; CHECK: subfze 5, 3
; CHECK: sradi 5, 5, 63
; CHECK: subfc 4, 4, 5
; CHECK: subfe 4, 3, 5
; CHECK: sradi 4, 4, 63

; With MemorySSA, everything is taken out of the loop by licm.
; Loads and stores to undef are treated as non-aliasing.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/subregliveness-04.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s

; Check for successful compilation.
; CHECK: lhi %r0, -5
; CHECK: lhi {{%r[0-9]+}}, -5

target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-ibm-linux"
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fold-tied-op.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386--netbsd"

; CHECK-LABEL: fn1
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: retl
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/pr28444.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) {
; CHECK-LABEL: extractelt_mismatch_vector_element_type:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: movb $1, (%rax)
; CHECK-NEXT: movb $1, (%rax)
; CHECK-NEXT: retq
bb:
%tmp = icmp ult i32 %arg, 0
Expand Down

0 comments on commit 0376ac1

Please sign in to comment.