7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/Attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,9 @@ class AttributeList {
/// arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;

/// Get range (or std::nullopt if unknown) of an arg.
std::optional<ConstantRange> getParamRange(unsigned ArgNo) const;

/// Get the disallowed floating-point classes of the return value.
FPClassTest getRetNoFPClass() const;

Expand Down Expand Up @@ -1123,6 +1126,10 @@ class AttrBuilder {
/// invalid if the Kind is not present in the builder.
Attribute getAttribute(StringRef Kind) const;

/// Retrieve the range if the attribute exists (std::nullopt is returned
/// otherwise).
std::optional<ConstantRange> getRange() const;

/// Return raw (possibly packed/encoded) value of integer attribute or
/// std::nullopt if not set.
std::optional<uint64_t> getRawIntAttr(Attribute::AttrKind Kind) const;
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/IR/Attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
return getParamAttrs(Index).getDereferenceableOrNullBytes();
}

std::optional<ConstantRange>
AttributeList::getParamRange(unsigned ArgNo) const {
auto RangeAttr = getParamAttrs(ArgNo).getAttribute(Attribute::Range);
if (RangeAttr.isValid())
return RangeAttr.getRange();
return std::nullopt;
}

FPClassTest AttributeList::getRetNoFPClass() const {
return getRetAttrs().getNoFPClass();
}
Expand Down Expand Up @@ -2277,6 +2285,13 @@ Attribute AttrBuilder::getAttribute(StringRef A) const {
return {};
}

std::optional<ConstantRange> AttrBuilder::getRange() const {
const Attribute RangeAttr = getAttribute(Attribute::Range);
if (RangeAttr.isValid())
return RangeAttr.getRange();
return std::nullopt;
}

bool AttrBuilder::contains(Attribute::AttrKind A) const {
return getAttribute(A).isValid();
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ def SDT_LoongArchMOVGR2FR_W_LA64
def SDT_LoongArchMOVFR2GR_S_LA64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;

def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;

//===----------------------------------------------------------------------===//
// Instructions
Expand Down Expand Up @@ -286,6 +290,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
def : Pat<(loongarch_frecipe FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(loongarch_frsqrte FPR32:$src), (FRSQRTE_S FPR32:$src)>;
}

// fmadd.s: fj * fk + fa
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
def : Pat<(loongarch_frecipe FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(loongarch_frsqrte FPR64:$src), (FRSQRTE_D FPR64:$src)>;
}

// fmadd.d: fj * fk + fa
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4697,6 +4697,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_ZERO)
NODE_NAME_CASE(VALL_NONZERO)
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
}
#undef NODE_NAME_CASE
return nullptr;
Expand Down Expand Up @@ -5900,6 +5902,71 @@ Register LoongArchTargetLowering::getExceptionSelectorRegister(
return LoongArch::R5;
}

//===----------------------------------------------------------------------===//
// Target Optimization Hooks
//===----------------------------------------------------------------------===//

static int getEstimateRefinementSteps(EVT VT,
const LoongArchSubtarget &Subtarget) {
// Feature FRECIPE instrucions relative accuracy is 2^-14.
// IEEE float has 23 digits and double has 52 digits.
int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
return RefinementSteps;
}

SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
if (Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);

return Estimate;
}
}

return SDValue();
}

SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
}
}

return SDValue();
}

//===----------------------------------------------------------------------===//
// LoongArch Inline Assembly Support
//===----------------------------------------------------------------------===//
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ enum NodeType : unsigned {
VALL_NONZERO,
VANY_NONZERO,

// Floating point approximate reciprocal operation
FRECIPE,
FRSQRTE

// Intrinsic operations end =============================================
};
} // end namespace LoongArchISD
Expand Down Expand Up @@ -216,6 +220,17 @@ class LoongArchTargetLowering : public TargetLowering {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;

bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;

SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;

ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

// Target nodes.
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;

def lasxsplati8
Expand Down Expand Up @@ -2094,6 +2095,15 @@ foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in
foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
(!cast<LAInst>(Inst) LASX256:$xj)>;

def : Pat<(loongarch_vfrecipe v8f32:$src),
(XVFRECIPE_S v8f32:$src)>;
def : Pat<(loongarch_vfrecipe v4f64:$src),
(XVFRECIPE_D v4f64:$src)>;
def : Pat<(loongarch_vfrsqrte v8f32:$src),
(XVFRSQRTE_S v8f32:$src)>;
def : Pat<(loongarch_vfrsqrte v4f64:$src),
(XVFRSQRTE_D v4f64:$src)>;
}

def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;

// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
Expand Down Expand Up @@ -50,6 +52,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;

def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;

def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
Expand Down Expand Up @@ -2238,6 +2242,15 @@ foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in
foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
(!cast<LAInst>(Inst) LSX128:$vj)>;

def : Pat<(loongarch_vfrecipe v4f32:$src),
(VFRECIPE_S v4f32:$src)>;
def : Pat<(loongarch_vfrecipe v2f64:$src),
(VFRECIPE_D v2f64:$src)>;
def : Pat<(loongarch_vfrsqrte v4f32:$src),
(VFRSQRTE_S v4f32:$src)>;
def : Pat<(loongarch_vfrsqrte v2f64:$src),
(VFRSQRTE_D v2f64:$src)>;
}

// load
Expand Down
90 changes: 74 additions & 16 deletions llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
Expand All @@ -59,6 +60,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
Expand Down Expand Up @@ -1358,18 +1360,36 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
auto &Context = CalledFunction->getContext();

// Collect valid attributes for all params.
SmallVector<AttrBuilder> ValidParamAttrs;
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
bool HasAttrToPropagate = false;

// Attributes we can only propagate if the exact parameter is forwarded.
// We can propagate both poison generating and UB generating attributes
// without any extra checks. The only attribute that is tricky to propagate
// is `noundef` (skipped for now) as that can create new UB where previous
// behavior was just using a poison value.
static const Attribute::AttrKind ExactAttrsToPropagate[] = {
Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
Attribute::NonNull, Attribute::Alignment, Attribute::Range};

for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
// Access attributes can be propagated to any param with the same underlying
// object as the argument.
if (CB.paramHasAttr(I, Attribute::ReadNone))
ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
if (CB.paramHasAttr(I, Attribute::ReadOnly))
ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);

for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
Attribute Attr = CB.getParamAttr(I, AK);
if (Attr.isValid())
ValidExactParamAttrs.back().addAttribute(Attr);
}

HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
}

// Won't be able to propagate anything.
Expand All @@ -1391,22 +1411,60 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,

AttributeList AL = NewInnerCB->getAttributes();
for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
// Check if the underlying value for the parameter is an argument.
const Value *UnderlyingV =
getUnderlyingObject(InnerCB->getArgOperand(I));
const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
if (!Arg)
// It's unsound or requires special handling to propagate
// attributes to byval arguments. Even if CalledFunction
// doesn't e.g. write to the argument (readonly), the call to
// NewInnerCB may write to its by-value copy.
if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))
continue;

if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))
// It's unsound to propagate memory attributes to byval arguments.
// Even if CalledFunction doesn't e.g. write to the argument,
// the call to NewInnerCB may write to its by-value copy.
// Don't bother propagating attrs to constants.
if (match(NewInnerCB->getArgOperand(I),
llvm::PatternMatch::m_ImmConstant()))
continue;

unsigned ArgNo = Arg->getArgNo();
// Check if the underlying value for the parameter is an argument.
const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I));
unsigned ArgNo;
if (Arg) {
ArgNo = Arg->getArgNo();
// For dereferenceable, dereferenceable_or_null, align, etc...
// we don't want to propagate if the existing param has the same
// attribute with "better" constraints. So remove from the
// new AL if the region of the existing param is larger than
// what we can propagate.
AttrBuilder NewAB{
Context, AttributeSet::get(Context, ValidExactParamAttrs[ArgNo])};
if (AL.getParamDereferenceableBytes(I) >
NewAB.getDereferenceableBytes())
NewAB.removeAttribute(Attribute::Dereferenceable);
if (AL.getParamDereferenceableOrNullBytes(I) >
NewAB.getDereferenceableOrNullBytes())
NewAB.removeAttribute(Attribute::DereferenceableOrNull);
if (AL.getParamAlignment(I).valueOrOne() >
NewAB.getAlignment().valueOrOne())
NewAB.removeAttribute(Attribute::Alignment);
if (auto ExistingRange = AL.getParamRange(I)) {
if (auto NewRange = NewAB.getRange()) {
ConstantRange CombinedRange =
ExistingRange->intersectWith(*NewRange);
NewAB.removeAttribute(Attribute::Range);
NewAB.addRangeAttr(CombinedRange);
}
}
AL = AL.addParamAttributes(Context, I, NewAB);
} else {
// Check if the underlying value for the parameter is an argument.
const Value *UnderlyingV =
getUnderlyingObject(InnerCB->getArgOperand(I));
Arg = dyn_cast<Argument>(UnderlyingV);
if (!Arg)
continue;
ArgNo = Arg->getArgNo();
}

// If so, propagate its access attributes.
AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
// We can have conflicting attributes from the inner callsite and
// to-be-inlined callsite. In that case, choose the most
// restrictive.
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,-frecipe < %s | FileCheck %s --check-prefix=LA32F
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,+frecipe < %s | FileCheck %s --check-prefix=LA32F-FRECIPE
; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=LA64D
; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s --check-prefix=LA64D-FRECIPE

;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction

define float @fdiv_s(float %x, float %y) {
; LA32F-LABEL: fdiv_s:
; LA32F: # %bb.0:
; LA32F-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_s:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA32F-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_s:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_s:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast float %x, %y
ret float %div
}

define double @fdiv_d(double %x, double %y) {
; LA32F-LABEL: fdiv_d:
; LA32F: # %bb.0:
; LA32F-NEXT: addi.w $sp, $sp, -16
; LA32F-NEXT: .cfi_def_cfa_offset 16
; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-NEXT: .cfi_offset 1, -4
; LA32F-NEXT: bl %plt(__divdf3)
; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: addi.w $sp, $sp, 16
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_d:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
; LA32F-FRECIPE-NEXT: .cfi_def_cfa_offset 16
; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-FRECIPE-NEXT: .cfi_offset 1, -4
; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_d:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.d $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_d:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast double %x, %y
ret double %div
}
797 changes: 797 additions & 0 deletions llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll

Large diffs are not rendered by default.

114 changes: 114 additions & 0 deletions llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s

define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
; FAULT-LABEL: fdiv_v8f32:
; FAULT: # %bb.0:
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvld $xr1, $a2, 0
; FAULT-NEXT: xvfdiv.s $xr0, $xr0, $xr1
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: fdiv_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a1, 0
; CHECK-NEXT: xvfrecipe.s $xr2, $xr0
; CHECK-NEXT: xvfmul.s $xr3, $xr1, $xr2
; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr3, $xr1
; CHECK-NEXT: xvfmadd.s $xr0, $xr2, $xr0, $xr3
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
%v1 = load <8 x float>, ptr %a1
%v2 = fdiv fast <8 x float> %v0, %v1
store <8 x float> %v2, ptr %res
ret void
}

define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
; FAULT-LABEL: fdiv_v4f64:
; FAULT: # %bb.0:
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvld $xr1, $a2, 0
; FAULT-NEXT: xvfdiv.d $xr0, $xr0, $xr1
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: fdiv_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a1, 0
; CHECK-NEXT: lu52i.d $a1, $zero, -1025
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1
; CHECK-NEXT: xvfrecipe.d $xr3, $xr0
; CHECK-NEXT: xvfmadd.d $xr2, $xr0, $xr3, $xr2
; CHECK-NEXT: xvfnmsub.d $xr2, $xr2, $xr3, $xr3
; CHECK-NEXT: xvfmul.d $xr3, $xr1, $xr2
; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1
; CHECK-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
%v1 = load <4 x double>, ptr %a1
%v2 = fdiv fast <4 x double> %v0, %v1
store <4 x double> %v2, ptr %res
ret void
}

;; 1.0 / vec
define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_fdiv_v8f32:
; FAULT: # %bb.0:
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvfrecip.s $xr0, $xr0
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_fdiv_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvfrecipe.s $xr1, $xr0
; CHECK-NEXT: lu12i.w $a1, -264192
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1
; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %v0
store <8 x float> %div, ptr %res
ret void
}

define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_fdiv_v4f64:
; FAULT: # %bb.0:
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvfrecip.d $xr0, $xr0
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_fdiv_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvfrecipe.d $xr1, $xr0
; CHECK-NEXT: lu52i.d $a1, $zero, 1023
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1
; CHECK-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2
; CHECK-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1
; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2
; CHECK-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
store <4 x double> %div, ptr %res
ret void
}
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s

;; 1.0 / (fsqrt vec)
define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_div_sqrt_v8f32:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvfrsqrt.s $xr0, $xr0
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_div_sqrt_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvfrsqrte.s $xr1, $xr0
; CHECK-NEXT: xvfmul.s $xr1, $xr0, $xr1
; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1
; CHECK-NEXT: lu12i.w $a1, -261120
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1
; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
; CHECK-NEXT: lu12i.w $a1, -266240
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1
; CHECK-NEXT: xvfmul.s $xr1, $xr1, $xr2
; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0, align 16
%sqrt = call fast <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0)
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
store <8 x float> %div, ptr %res, align 16
ret void
}

define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_div_sqrt_v4f64:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: xvld $xr0, $a1, 0
; FAULT-NEXT: xvfrsqrt.d $xr0, $xr0
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_div_sqrt_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvfrsqrte.d $xr1, $xr0
; CHECK-NEXT: xvfmul.d $xr1, $xr0, $xr1
; CHECK-NEXT: xvfmul.d $xr2, $xr0, $xr1
; CHECK-NEXT: ori $a1, $zero, 0
; CHECK-NEXT: lu32i.d $a1, -524288
; CHECK-NEXT: lu52i.d $a1, $a1, -1024
; CHECK-NEXT: xvreplgr2vr.d $xr3, $a1
; CHECK-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3
; CHECK-NEXT: lu52i.d $a1, $zero, -1026
; CHECK-NEXT: xvreplgr2vr.d $xr4, $a1
; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr4
; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr2
; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr3
; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr4
; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0, align 16
%sqrt = call fast <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0)
%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %sqrt
store <4 x double> %div, ptr %res, align 16
ret void
}

declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
114 changes: 114 additions & 0 deletions llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefix=FAULT
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s

define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
; FAULT-LABEL: fdiv_v4f32:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vld $vr1, $a2, 0
; FAULT-NEXT: vfdiv.s $vr0, $vr0, $vr1
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: fdiv_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a1, 0
; CHECK-NEXT: vfrecipe.s $vr2, $vr0
; CHECK-NEXT: vfmul.s $vr3, $vr1, $vr2
; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr3, $vr1
; CHECK-NEXT: vfmadd.s $vr0, $vr2, $vr0, $vr3
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
%v1 = load <4 x float>, ptr %a1
%v2 = fdiv fast <4 x float> %v0, %v1
store <4 x float> %v2, ptr %res
ret void
}

define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
; FAULT-LABEL: fdiv_v2f64:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vld $vr1, $a2, 0
; FAULT-NEXT: vfdiv.d $vr0, $vr0, $vr1
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: fdiv_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a1, 0
; CHECK-NEXT: lu52i.d $a1, $zero, -1025
; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
; CHECK-NEXT: vfrecipe.d $vr3, $vr0
; CHECK-NEXT: vfmadd.d $vr2, $vr0, $vr3, $vr2
; CHECK-NEXT: vfnmsub.d $vr2, $vr2, $vr3, $vr3
; CHECK-NEXT: vfmul.d $vr3, $vr1, $vr2
; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr1
; CHECK-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr3
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <2 x double>, ptr %a0
%v1 = load <2 x double>, ptr %a1
%v2 = fdiv fast <2 x double> %v0, %v1
store <2 x double> %v2, ptr %res
ret void
}

;; 1.0 / vec
define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_fdiv_v4f32:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vfrecip.s $vr0, $vr0
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_fdiv_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrecipe.s $vr1, $vr0
; CHECK-NEXT: lu12i.w $a1, -264192
; CHECK-NEXT: vreplgr2vr.w $vr2, $a1
; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2
; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %v0
store <4 x float> %div, ptr %res
ret void
}

define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_fdiv_v2f64:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vfrecip.d $vr0, $vr0
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL: one_fdiv_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrecipe.d $vr1, $vr0
; CHECK-NEXT: lu52i.d $a1, $zero, 1023
; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
; CHECK-NEXT: vfnmsub.d $vr3, $vr0, $vr1, $vr2
; CHECK-NEXT: vfmadd.d $vr1, $vr1, $vr3, $vr1
; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2
; CHECK-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <2 x double>, ptr %a0
%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %v0
store <2 x double> %div, ptr %res
ret void
}
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefix=FAULT
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s

;; 1.0 / (fsqrt vec)
define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_div_sqrt_v4f32:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vfrsqrt.s $vr0, $vr0
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL one_div_sqrt_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrsqrte.s $vr1, $vr0
; CHECK-NEXT: vfmul.s $vr1, $vr0, $vr1
; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1
; CHECK-NEXT: lu12i.w $a1, -261120
; CHECK-NEXT: vreplgr2vr.w $vr2, $a1
; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2
; CHECK-NEXT: lu12i.w $a1, -266240
; CHECK-NEXT: vreplgr2vr.w $vr2, $a1
; CHECK-NEXT: vfmul.s $vr1, $vr1, $vr2
; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0, align 16
%sqrt = call fast <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0)
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
store <4 x float> %div, ptr %res, align 16
ret void
}

define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind {
; FAULT-LABEL: one_div_sqrt_v2f64:
; FAULT: # %bb.0: # %entry
; FAULT-NEXT: vld $vr0, $a1, 0
; FAULT-NEXT: vfrsqrt.d $vr0, $vr0
; FAULT-NEXT: vst $vr0, $a0, 0
; FAULT-NEXT: ret
;
; CHECK-LABEL one_div_sqrt_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrsqrte.d $vr1, $vr0
; CHECK-NEXT: vfmul.d $vr1, $vr0, $vr1
; CHECK-NEXT: vfmul.d $vr2, $vr0, $vr1
; CHECK-NEXT: ori $a1, $zero, 0
; CHECK-NEXT: lu32i.d $a1, -524288
; CHECK-NEXT: lu52i.d $a1, $a1, -1024
; CHECK-NEXT: vreplgr2vr.d $vr3, $a1
; CHECK-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3
; CHECK-NEXT: lu52i.d $a1, $zero, -1026
; CHECK-NEXT: vreplgr2vr.d $vr4, $a1
; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4
; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr2
; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1
; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr3
; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4
; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <2 x double>, ptr %a0, align 16
%sqrt = call fast <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0)
%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %sqrt
store <2 x double> %div, ptr %res, align 16
ret void
}

declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
164 changes: 157 additions & 7 deletions llvm/test/Transforms/Inline/access-attributes-prop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ define dso_local void @foo3_writable(ptr %p) {
ret void
}


define dso_local void @foo1_bar_aligned64_deref512(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@foo1_bar_aligned64_deref512
; CHECK-SAME: (ptr [[P:%.*]]) {
Expand Down Expand Up @@ -306,7 +305,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
define void @prop_deref(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr dereferenceable(16) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable(16) %p)
Expand All @@ -316,7 +315,7 @@ define void @prop_deref(ptr %p) {
define void @prop_deref_or_null(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr dereferenceable_or_null(256) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr dereferenceable_or_null(256) %p)
Expand All @@ -326,13 +325,23 @@ define void @prop_deref_or_null(ptr %p) {
define void @prop_param_nonnull_and_align(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: call void @bar1(ptr nonnull align 32 [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr nonnull align 32 %p)
ret void
}

define void @prop_param_nofree_and_align(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr align 32 [[P]])
; CHECK-NEXT: ret void
;
call void @foo1(ptr nofree align 32 %p)
ret void
}

define void @prop_param_deref_align_no_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update
; CHECK-SAME: (ptr [[P:%.*]]) {
Expand All @@ -346,7 +355,7 @@ define void @prop_param_deref_align_no_update(ptr %p) {
define void @prop_param_deref_align_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]])
; CHECK-NEXT: call void @bar1(ptr align 128 dereferenceable(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) %p)
Expand All @@ -356,7 +365,7 @@ define void @prop_param_deref_align_update(ptr %p) {
define void @prop_param_deref_or_null_update(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]])
; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(1024) [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(1024) %p)
Expand Down Expand Up @@ -539,7 +548,6 @@ define void @prop_no_conflict_writable(ptr %p) {
ret void
}


define void @prop_no_conflict_writable2(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_no_conflict_writable2
; CHECK-SAME: (ptr [[P:%.*]]) {
Expand Down Expand Up @@ -600,3 +608,145 @@ define void @prop_byval_readonly2(ptr %p) {
call void @foo_byval_readonly2(ptr %p)
ret void
}

declare void @bar5(i32)

define dso_local void @foo4_range_0_10(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@foo4_range_0_10
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 10) [[V]])
; CHECK-NEXT: ret void
;
call void @bar5(i32 range(i32 0, 10) %v)
ret void
}

define dso_local void @foo4_range_10_40(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@foo4_range_10_40
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 10, 40) [[V]])
; CHECK-NEXT: ret void
;
call void @bar5(i32 range(i32 10, 40) %v)
ret void
}

define dso_local void @foo4_2_range_0_10(i32 range(i32 0, 10) %v) {
; CHECK-LABEL: define {{[^@]+}}@foo4_2_range_0_10
; CHECK-SAME: (i32 range(i32 0, 10) [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 [[V]])
; CHECK-NEXT: ret void
;
call void @bar5(i32 %v)
ret void
}

define dso_local void @foo4(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@foo4
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 [[V]])
; CHECK-NEXT: ret void
;
call void @bar5(i32 %v)
ret void
}

define void @prop_range_empty_intersect(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 0) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 11, 50) %v)
ret void
}

define void @prop_range_empty(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 0) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4(i32 range(i32 1, 0) %v)
ret void
}

define void @prop_range_empty_with_intersect(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 10) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 1, 0) %v)
ret void
}

define void @prop_range_intersect1(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 9) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 0, 9) %v)
ret void
}

define void @prop_range_intersect2(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 9) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 1, 9) %v)
ret void
}

define void @prop_range_intersect3(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 11) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v)
ret void
}

define void @prop_range_intersect4(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect4
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 0, 5) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_0_10(i32 range(i32 40, 5) %v)
ret void
}

define void @prop_range_intersect5(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect5
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 10, 40) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_10_40(i32 range(i32 30, 20) %v)
ret void
}

define void @prop_range_keep(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_keep
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 10, 40) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4_range_10_40(i32 %v)
ret void
}

define void @prop_range_direct(i32 %v) {
; CHECK-LABEL: define {{[^@]+}}@prop_range_direct
; CHECK-SAME: (i32 [[V:%.*]]) {
; CHECK-NEXT: call void @bar5(i32 range(i32 1, 11) [[V]])
; CHECK-NEXT: ret void
;
call void @foo4(i32 range(i32 1, 11) %v)
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z)
define void @f(ptr %p, ptr %q, ptr %z) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) {
; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]])
; CHECK-NEXT: call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]])
; CHECK-NEXT: ret void
;
call void @g(ptr nonnull %p, ptr %q, ptr nonnull %z)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/Inline/byval.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ define void @test3() nounwind {
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
; CHECK-NEXT: call void @g3(ptr [[S1]]) #[[ATTR0]]
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
; CHECK-NEXT: ret void
;
Expand All @@ -131,7 +131,7 @@ define i32 @test4() nounwind {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR0]]
; CHECK-NEXT: call void @g3(ptr align 64 [[S]]) #[[ATTR0]]
; CHECK-NEXT: ret i32 4
;
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/pr95152.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define void @f(ptr dead_on_unwind noalias %p) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr dead_on_unwind noalias [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT: store i64 3, ptr [[P]], align 4
; CHECK-NEXT: tail call void @j(ptr nonnull [[P]])
; CHECK-NEXT: tail call void @j(ptr nonnull align 8 dereferenceable(8) [[P]])
; CHECK-NEXT: store i64 43, ptr [[P]], align 4
; CHECK-NEXT: ret void
;
Expand Down