31 changes: 31 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1709,6 +1709,34 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
}

/// Convert div to mul if we have an sqrt divisor iff sqrt's operand is a fdiv
/// instruction.
static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
// X / sqrt(Y / Z) --> X * sqrt(Z / Y)
if (!I.hasAllowReassoc() || !I.hasAllowReciprocal())
return nullptr;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
auto *II = dyn_cast<IntrinsicInst>(Op1);
if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() ||
!II->hasAllowReassoc() || !II->hasAllowReciprocal())
return nullptr;

Value *Y, *Z;
auto *DivOp = dyn_cast<Instruction>(II->getOperand(0));
if (!DivOp)
return nullptr;
if (!match(DivOp, m_FDiv(m_Value(Y), m_Value(Z))))
return nullptr;
if (!DivOp->hasAllowReassoc() || !I.hasAllowReciprocal() ||
!DivOp->hasOneUse())
return nullptr;
Value *SwapDiv = Builder.CreateFDivFMF(Z, Y, DivOp);
Value *NewSqrt =
Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), SwapDiv, II);
return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
}

Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
Module *M = I.getModule();

Expand Down Expand Up @@ -1816,6 +1844,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
if (Instruction *Mul = foldFDivPowDivisor(I, Builder))
return Mul;

if (Instruction *Mul = foldFDivSqrtDivisor(I, Builder))
return Mul;

// pow(X, Y) / X --> pow(X, Y-1)
if (I.hasAllowReassoc() &&
match(Op0, m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Specific(Op1),
Expand Down
81 changes: 49 additions & 32 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1406,41 +1406,27 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
// -> ({s|u}itofp (int_binop x, y))
// 2) (fp_binop ({s|u}itofp x), FpC)
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
Value *IntOps[2] = {nullptr, nullptr};
Constant *Op1FpC = nullptr;

// Check for:
// 1) (binop ({s|u}itofp x), ({s|u}itofp y))
// 2) (binop ({s|u}itofp x), FpC)
if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
!match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
return nullptr;

if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
!match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
!match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
return nullptr;
//
// Assuming the sign of the cast for x/y is `OpsFromSigned`.
Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {

Type *FPTy = BO.getType();
Type *IntTy = IntOps[0]->getType();

// Do we have signed casts?
bool OpsFromSigned = isa<SIToFPInst>(BO.getOperand(0));

unsigned IntSz = IntTy->getScalarSizeInBits();
// This is the maximum number of inuse bits by the integer where the int -> fp
// casts are exact.
unsigned MaxRepresentableBits =
APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());

// Cache KnownBits a bit to potentially save some analysis.
WithCache<const Value *> OpsKnown[2] = {IntOps[0], IntOps[1]};

// Preserve known number of leading bits. This can allow us to trivial nsw/nuw
// checks later on.
unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};

// NB: This only comes up if OpsFromSigned is true, so there is no need to
// cache if between calls to `foldFBinOpOfIntCastsFromSign`.
auto IsNonZero = [&](unsigned OpNo) -> bool {
if (OpsKnown[OpNo].hasKnownBits() &&
OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
Expand All @@ -1449,14 +1435,19 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
};

auto IsNonNeg = [&](unsigned OpNo) -> bool {
if (OpsKnown[OpNo].hasKnownBits() &&
OpsKnown[OpNo].getKnownBits(SQ).isNonNegative())
return true;
return isKnownNonNegative(IntOps[OpNo], SQ);
// NB: This matches the impl in ValueTracking, we just try to use cached
// knownbits here. If we ever start supporting WithCache for
// `isKnownNonNegative`, change this to an explicit call.
return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
};

// Check if we know for certain that ({s|u}itofp op) is exact.
auto IsValidPromotion = [&](unsigned OpNo) -> bool {
// Can we treat this operand as the desired sign?
if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
!IsNonNeg(OpNo))
return false;

// If fp precision >= bitwidth(op) then its exact.
// NB: This is slightly conservative for `sitofp`. For signed conversion, we
// can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
Expand Down Expand Up @@ -1509,13 +1500,6 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
return nullptr;

if (Op1FpC == nullptr) {
if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(1))) {
// If we have a signed + unsigned, see if we can treat both as signed
// (uitofp nneg x) == (sitofp nneg x).
if (OpsFromSigned ? !IsNonNeg(1) : !IsNonNeg(0))
return nullptr;
OpsFromSigned = true;
}
if (!IsValidPromotion(1))
return nullptr;
}
Expand Down Expand Up @@ -1574,6 +1558,39 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
return new UIToFPInst(IntBinOp, FPTy);
}

// Try to fold:
// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
// -> ({s|u}itofp (int_binop x, y))
// 2) (fp_binop ({s|u}itofp x), FpC)
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
std::array<Value *, 2> IntOps = {nullptr, nullptr};
Constant *Op1FpC = nullptr;
// Check for:
// 1) (binop ({s|u}itofp x), ({s|u}itofp y))
// 2) (binop ({s|u}itofp x), FpC)
if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
!match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
return nullptr;

if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
!match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
!match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
return nullptr;

// Cache KnownBits a bit to potentially save some analysis.
SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};

// Try treating x/y as coming from both `uitofp` and `sitofp`. There are
// different constraints depending on the sign of the cast.
// NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
IntOps, Op1FpC, OpsKnown))
return R;
return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
Op1FpC, OpsKnown);
}

/// A binop with a constant operand and a sign-extended boolean operand may be
/// converted into a select of constants by applying the binary operation to
/// the constant with the two possible values of the extended boolean (0 or -1).
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/InstCombine/add-sitofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ define double @x(i32 %a, i32 %b) {
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24
; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1
; CHECK-NEXT: [[P:%.*]] = sitofp i32 [[TMP1]] to double
; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to double
; CHECK-NEXT: ret double [[P]]
;
%m = lshr i32 %a, 24
Expand All @@ -20,7 +20,7 @@ define double @test(i32 %a) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], 1
; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[TMP1]] to double
; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double
; CHECK-NEXT: ret double [[RES]]
;
; Drop two highest bits to guarantee that %a + 1 doesn't overflow
Expand Down Expand Up @@ -49,7 +49,7 @@ define double @test_2(i32 %a, i32 %b) {
; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[TMP1]] to double
; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double
; CHECK-NEXT: ret double [[RES]]
;
; Drop two highest bits to guarantee that %a + %b doesn't overflow
Expand Down Expand Up @@ -89,7 +89,7 @@ define float @test_3(i32 %a, i32 %b) {
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24
; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1
; CHECK-NEXT: [[P:%.*]] = sitofp i32 [[TMP1]] to float
; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to float
; CHECK-NEXT: ret float [[P]]
;
%m = lshr i32 %a, 24
Expand All @@ -104,7 +104,7 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
; CHECK-NEXT: [[RES:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
; CHECK-NEXT: [[RES:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
; CHECK-NEXT: ret <4 x double> [[RES]]
;
; Drop two highest bits to guarantee that %a + %b doesn't overflow
Expand Down
54 changes: 31 additions & 23 deletions llvm/test/Transforms/InstCombine/binop-itofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ define half @test_ui_si_i8_add(i8 noundef %x_in, i8 noundef %y_in) {
; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63
; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i8 %x_in, 63
Expand All @@ -125,9 +125,8 @@ define half @test_ui_si_i8_add_overflow(i8 noundef %x_in, i8 noundef %y_in) {
; CHECK-LABEL: @test_ui_si_i8_add_overflow(
; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63
; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 65
; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half
; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half
; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i8 %x_in, 63
Expand All @@ -152,9 +151,8 @@ define half @test_ui_ui_i8_sub_C(i8 noundef %x_in) {

define half @test_ui_ui_i8_sub_C_fail_overflow(i8 noundef %x_in) {
; CHECK-LABEL: @test_ui_ui_i8_sub_C_fail_overflow(
; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127
; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half
; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xHD800
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X_IN:%.*]], -128
; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i8 %x_in, 127
Expand Down Expand Up @@ -212,8 +210,8 @@ define half @test_si_si_i8_sub_C(i8 noundef %x_in) {
define half @test_si_si_i8_sub_C_fail_overflow(i8 noundef %x_in) {
; CHECK-LABEL: @test_si_si_i8_sub_C_fail_overflow(
; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 65
; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half
; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH5400
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X]], 64
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i8 %x_in, 65
Expand Down Expand Up @@ -242,9 +240,8 @@ define half @test_ui_si_i8_sub_fail_maybe_sign(i8 noundef %x_in, i8 noundef %y_i
; CHECK-LABEL: @test_ui_si_i8_sub_fail_maybe_sign(
; CHECK-NEXT: [[X:%.*]] = or i8 [[X_IN:%.*]], 64
; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63
; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half
; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half
; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]]
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = or i8 %x_in, 64
Expand Down Expand Up @@ -273,8 +270,8 @@ define half @test_ui_ui_i8_mul(i8 noundef %x_in, i8 noundef %y_in) {

define half @test_ui_ui_i8_mul_C(i8 noundef %x_in) {
; CHECK-LABEL: @test_ui_ui_i8_mul_C(
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X_IN:%.*]], 4
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: [[X:%.*]] = shl i8 [[X_IN:%.*]], 4
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[X]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i8 %x_in, 15
Expand Down Expand Up @@ -368,7 +365,7 @@ define half @test_ui_si_i8_mul(i8 noundef %x_in, i8 noundef %y_in) {
; CHECK-NEXT: [[YY:%.*]] = and i8 [[Y_IN:%.*]], 7
; CHECK-NEXT: [[Y:%.*]] = add nuw nsw i8 [[YY]], 1
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%xx = and i8 %x_in, 6
Expand All @@ -386,9 +383,8 @@ define half @test_ui_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i
; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 7
; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1
; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 7
; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half
; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half
; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]]
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%xx = and i8 %x_in, 7
Expand Down Expand Up @@ -694,7 +690,7 @@ define half @test_ui_si_i16_mul(i16 noundef %x_in, i16 noundef %y_in) {
; CHECK-NEXT: [[YY:%.*]] = and i16 [[Y_IN:%.*]], 126
; CHECK-NEXT: [[Y:%.*]] = or disjoint i16 [[YY]], 1
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i16 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = sitofp i16 [[TMP1]] to half
; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%xx = and i16 %x_in, 126
Expand Down Expand Up @@ -807,9 +803,8 @@ define half @test_ui_ui_i12_sub_fail_overflow(i12 noundef %x_in, i12 noundef %y_
; CHECK-LABEL: @test_ui_ui_i12_sub_fail_overflow(
; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 1023
; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], 2047
; CHECK-NEXT: [[XF:%.*]] = uitofp i12 [[X]] to half
; CHECK-NEXT: [[YF:%.*]] = uitofp i12 [[Y]] to half
; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]]
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i12 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%x = and i12 %x_in, 1023
Expand Down Expand Up @@ -984,7 +979,7 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) {
; CHECK-NEXT: [[YY:%.*]] = and i12 [[Y_IN:%.*]], 30
; CHECK-NEXT: [[Y:%.*]] = or disjoint i12 [[YY]], 1
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half
; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half
; CHECK-NEXT: ret half [[R]]
;
%xx = and i12 %x_in, 31
Expand All @@ -996,3 +991,16 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) {
%r = fmul half %xf, %yf
ret half %r
}

define float @test_ui_add_with_signed_constant(i32 %shr.i) {
; CHECK-LABEL: @test_ui_add_with_signed_constant(
; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I:%.*]], 32767
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[AND_I]], -16383
; CHECK-NEXT: [[ADD:%.*]] = sitofp i32 [[TMP1]] to float
; CHECK-NEXT: ret float [[ADD]]
;
%and.i = and i32 %shr.i, 32767
%sub = uitofp i32 %and.i to float
%add = fadd float %sub, -16383.0
ret float %add
}
34 changes: 25 additions & 9 deletions llvm/test/Transforms/InstCombine/fdiv-sqrt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ declare double @llvm.sqrt.f64(double)
define double @sqrt_div_fast(double %x, double %y, double %z) {
; CHECK-LABEL: @sqrt_div_fast(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[DIV]])
; CHECK-NEXT: [[DIV1:%.*]] = fdiv fast double [[X:%.*]], [[SQRT]]
; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast double [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP0]])
; CHECK-NEXT: [[DIV1:%.*]] = fmul fast double [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret double [[DIV1]]
;
entry:
Expand Down Expand Up @@ -36,9 +36,9 @@ entry:
define double @sqrt_div_reassoc_arcp(double %x, double %y, double %z) {
; CHECK-LABEL: @sqrt_div_reassoc_arcp(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc arcp double [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[SQRT:%.*]] = call reassoc arcp double @llvm.sqrt.f64(double [[DIV]])
; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc arcp double [[X:%.*]], [[SQRT]]
; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc arcp double [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = call reassoc arcp double @llvm.sqrt.f64(double [[TMP0]])
; CHECK-NEXT: [[DIV1:%.*]] = fmul reassoc arcp double [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret double [[DIV1]]
;
entry:
Expand Down Expand Up @@ -96,9 +96,9 @@ entry:
define double @sqrt_div_arcp_missing(double %x, double %y, double %z) {
; CHECK-LABEL: @sqrt_div_arcp_missing(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc double [[Y:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[SQRT:%.*]] = call reassoc arcp double @llvm.sqrt.f64(double [[DIV]])
; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc arcp double [[X:%.*]], [[SQRT]]
; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = call reassoc arcp double @llvm.sqrt.f64(double [[TMP0]])
; CHECK-NEXT: [[DIV1:%.*]] = fmul reassoc arcp double [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret double [[DIV1]]
;
entry:
Expand Down Expand Up @@ -173,3 +173,19 @@ entry:
ret double %div1
}

define float @sqrt_non_div_operator(float %a) {
; CHECK-LABEL: @sqrt_non_div_operator(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[A:%.*]] to double
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[CONV]])
; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double [[CONV]], [[SQRT]]
; CHECK-NEXT: [[CONV2:%.*]] = fptrunc double [[DIV]] to float
; CHECK-NEXT: ret float [[CONV2]]
;
entry:
%conv = fpext float %a to double
%sqrt = call fast double @llvm.sqrt.f64(double %conv)
%div = fdiv fast double %conv, %sqrt
%conv2 = fptrunc double %div to float
ret float %conv2
}
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2507,7 +2507,7 @@ typedef struct kmp_depend_info {
union {
kmp_uint8 flag; // flag as an unsigned char
struct { // flag as a set of 8 bits
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */
unsigned all : 1;
unsigned unused : 3;
Expand Down Expand Up @@ -2672,7 +2672,7 @@ typedef struct kmp_task_stack {
#endif // BUILD_TIED_TASK_STACK

typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */
#if OMPX_TASKGRAPH
unsigned reserved31 : 6;
Expand Down
3 changes: 2 additions & 1 deletion openmp/runtime/src/kmp_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ extern void __kmp_validate_locks(void);

struct kmp_base_tas_lock {
// KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \
__LP64__
// Flip the ordering of the high and low 32-bit member to be consistent
// with the memory layout of the address in 64-bit big-endian.
kmp_int32 depth_locked; // depth locked, for nested locks only
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/tasking/bug_nested_proxy_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct kmp_depend_info {
union {
kmp_uint8 flag; // flag as an unsigned char
struct { // flag as a set of 8 bits
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
unsigned all : 1;
unsigned unused : 3;
unsigned set : 1;
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ typedef struct kmp_depend_info {
union {
kmp_uint8 flag; // flag as an unsigned char
struct { // flag as a set of 8 bits
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
unsigned all : 1;
unsigned unused : 3;
unsigned set : 1;
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/tasking/hidden_helper_task/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ typedef struct kmp_depend_info {
union {
unsigned char flag;
struct {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
unsigned all : 1;
unsigned unused : 3;
unsigned set : 1;
Expand Down