-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[GlobalIsel] Combine ADDE #82413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalIsel] Combine ADDE #82413
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) ChangesClang has them as builtins (__builtin_addc). The middle end has no intrinsics for them. They are used in legalization operations. AArch64: ADCS Add with carry and set flags On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines. Patch is 192.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82413.diff 8 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 23728636498ba0..abc2ebdfd878c2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -810,12 +810,15 @@ class CombinerHelper {
/// Combine selects.
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
- /// Combine ands,
+ /// Combine ands.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
- /// Combine ors,
+ /// Combine ors.
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Combine addes.
+ bool matchAddCarryInOut(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -919,6 +922,7 @@ class CombinerHelper {
bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
bool isConstantSplatVector(Register Src, int64_t SplatValue,
bool AllowUndefs);
+ bool isConstantOrConstantVectorI(Register Src);
std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
@@ -930,6 +934,8 @@ class CombinerHelper {
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
+
+ bool isZExtOrTruncLegal(LLT ToTy, LLT FromTy) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index f5a6528d10a973..e46d2d1aac0e86 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -359,6 +359,8 @@ class GBinOpCarryOut : public GenericMachineInstr {
Register getCarryOutReg() const { return getReg(1); }
MachineOperand &getLHS() { return getOperand(2); }
MachineOperand &getRHS() { return getOperand(3); }
+ Register getLHSReg() { return getOperand(2).getReg(); }
+ Register getRHSReg() { return getOperand(3).getReg(); }
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
@@ -448,6 +450,21 @@ class GAddSubCarryInOut : public GAddSubCarryOut {
}
};
+/// Represents overflowing add operations that also consume a carry-in.
+/// G_UADDE, G_SADDE
+class GAddCarryInOut : public GAddSubCarryInOut {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
/// Represents a call to an intrinsic.
class GIntrinsic final : public GenericMachineInstr {
public:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7eadb718f16415..3a82bc14885beb 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1253,6 +1253,12 @@ def match_ors : GICombineRule<
[{ return Helper.matchOr(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def match_addes : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SADDE, G_UADDE):$root,
+ [{ return Helper.matchAddCarryInOut(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// Combines concat operations
def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
def combine_concat_vector : GICombineRule<
@@ -1335,7 +1341,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector]>;
+ combine_concat_vector, match_addes]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 779ec49f4d13a7..2cfc7387ed976d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6342,6 +6342,23 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
return Value;
}
+bool CombinerHelper::isConstantOrConstantVectorI(Register Src) {
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ if (IConstant)
+ return true;
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return false;
+ unsigned NumSources = BuildVector->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (!IConstant)
+ return false;
+ }
+ return true; // FIXME: G_SPLAT_VECTOR
+}
+
// TODO: use knownbits to determine zeros
bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
BuildFnTy &MatchInfo) {
@@ -6906,3 +6923,195 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
return false;
}
+
+bool CombinerHelper::isZExtOrTruncLegal(LLT ToTy, LLT FromTy) const {
+ // Copy.
+ if (ToTy == FromTy)
+ return true;
+
+ if (isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {ToTy, FromTy}}))
+ return true;
+
+ if (isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {ToTy, FromTy}}))
+ return true;
+
+ return false;
+}
+
+bool CombinerHelper::matchAddCarryInOut(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GAddCarryInOut *Add = cast<GAddCarryInOut>(&MI);
+
+ // adde has no flags.
+ Register Dst = Add->getDstReg();
+ Register Carry = Add->getCarryOutReg();
+ Register CarryIn = Add->getCarryInReg();
+ Register LHS = Add->getLHSReg();
+ Register RHS = Add->getRHSReg();
+ bool IsSigned = Add->isSigned();
+ LLT DstTy = MRI.getType(Dst);
+ LLT CarryTy = MRI.getType(Carry);
+ LLT OperandTy = MRI.getType(LHS);
+ LLT CarryInTy = MRI.getType(CarryIn);
+
+ // FIXME: handle undef
+
+ // fold sadde, if the carry is dead -> add(add(LHS, RHS),
+ // zextOrTrunc(CarryIn)), undef.
+ if (MRI.use_nodbg_empty(Carry) && IsSigned && MRI.hasOneNonDBGUse(Dst) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) &&
+ isZExtOrTruncLegal(DstTy, CarryInTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto A = B.buildAdd(DstTy, LHS, RHS);
+ Register AReg = A.getReg(0);
+ auto ZextCarryIn = B.buildZExtOrTrunc(DstTy, CarryIn);
+ Register ZextCarryInReg = ZextCarryIn.getReg(0);
+ B.buildAdd(Dst, AReg, ZextCarryInReg);
+ B.buildUndef(Carry);
+ };
+ return true;
+ }
+
+ // We want do fold the [u|s]adde.
+ if (!MRI.hasOneNonDBGUse(Dst) || !MRI.hasOneNonDBGUse(Carry))
+ return false;
+
+ // The parameters of the adde must be integer-like.
+ std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
+ std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
+ std::optional<APInt> MaybeCarryIn = getConstantOrConstantSplatVector(CarryIn);
+
+ // fold adde(c, c, c) -> c, carry
+ if (MaybeLHS && MaybeRHS && MaybeCarryIn &&
+ isConstantLegalOrBeforeLegalizer(DstTy) &&
+ isConstantLegalOrBeforeLegalizer(CarryTy)) {
+ // They must all have the same bitwidth. Otherwise APInt might
+ // assert. Prelegalization, they may have widely different bitwidths.
+ unsigned BitWidth =
+ std::max(std::max(MaybeLHS->getBitWidth(), MaybeRHS->getBitWidth()),
+ MaybeCarryIn->getBitWidth());
+ if (IsSigned) {
+ APInt LHS = MaybeLHS->sext(BitWidth);
+ APInt RHS = MaybeRHS->sext(BitWidth);
+ APInt CarryIn = MaybeCarryIn->zext(BitWidth);
+ bool FirstOverflowed = false;
+ bool SecondOverflowed = false;
+ APInt Result =
+ LHS.sadd_ov(RHS, FirstOverflowed).sadd_ov(CarryIn, SecondOverflowed);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, Result);
+ B.buildConstant(Carry, FirstOverflowed | SecondOverflowed);
+ };
+ return true;
+ } else if (!IsSigned) {
+ APInt LHS = MaybeLHS->zext(BitWidth);
+ APInt RHS = MaybeRHS->zext(BitWidth);
+ APInt CarryIn = MaybeCarryIn->zext(BitWidth);
+ bool FirstOverflowed = false;
+ bool SecondOverflowed = false;
+ APInt Result =
+ LHS.uadd_ov(RHS, FirstOverflowed).uadd_ov(CarryIn, SecondOverflowed);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, Result);
+ B.buildConstant(Carry, FirstOverflowed | SecondOverflowed);
+ };
+ return true;
+ }
+ }
+
+ // canonicalize constant to RHS.
+ if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
+ if (IsSigned) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSAdde(Dst, Carry, RHS, LHS, CarryIn);
+ };
+ return true;
+ } else {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildUAdde(Dst, Carry, RHS, LHS, CarryIn);
+ };
+ return true;
+ }
+ }
+
+ // fold adde(LHS, RHS, 0) -> addo(LHS, RHS)
+ if (MaybeCarryIn && *MaybeCarryIn == 0) {
+ if (IsSigned && isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SADDO, {DstTy, CarryTy, OperandTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSAddo(Dst, Carry, LHS, RHS);
+ };
+ return true;
+ } else if (!IsSigned &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_UADDO, {DstTy, CarryTy, OperandTy}}))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildUAddo(Dst, Carry, LHS, RHS);
+ };
+ return true;
+ }
+
+ // fold adde(LHS, 0, Carry) -> addo(LHS, Carry)
+ if (MaybeRHS && *MaybeRHS == 0) {
+ if (IsSigned &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SADDO, {DstTy, CarryTy, OperandTy}}) &&
+ isZExtOrTruncLegal(OperandTy, CarryInTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto ZextCarryIn = B.buildZExtOrTrunc(OperandTy, CarryIn);
+ Register ZextCarryInReg = ZextCarryIn.getReg(0);
+ B.buildSAddo(Dst, Carry, LHS, ZextCarryInReg);
+ };
+ return true;
+ } else if (!IsSigned &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_UADDO, {DstTy, CarryTy, OperandTy}}) &&
+ isZExtOrTruncLegal(OperandTy, CarryInTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto ZextCarryIn = B.buildZExtOrTrunc(OperandTy, CarryIn);
+ Register ZextCarryInReg = ZextCarryIn.getReg(0);
+ B.buildUAddo(Dst, Carry, LHS, ZextCarryInReg);
+ };
+ return true;
+ }
+ }
+
+ // We lower to 2*addo + 1*or.
+ if (IsSigned &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SADDO, {DstTy, CarryTy, OperandTy}}) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_OR, {DstTy}}) &&
+ isZExtOrTruncLegal(OperandTy, CarryInTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto First = B.buildSAddo(DstTy, CarryTy, LHS, RHS);
+ Register FirstResult = First.getReg(0);
+ Register FirstCarry = First.getReg(1);
+ auto ZextCarryIn = B.buildZExtOrTrunc(OperandTy, CarryIn);
+ auto Second = B.buildSAddo(DstTy, CarryTy, FirstResult, ZextCarryIn);
+ Register Result = Second.getReg(0);
+ Register SecondCarry = Second.getReg(1);
+ B.buildCopy(Dst, Result);
+ B.buildOr(Carry, FirstCarry, SecondCarry);
+ };
+ return true;
+ } else if (!IsSigned &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_UADDO, {DstTy, CarryTy, OperandTy}}) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_OR, {DstTy}}) &&
+ isZExtOrTruncLegal(OperandTy, CarryInTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto First = B.buildUAddo(DstTy, CarryTy, LHS, RHS);
+ Register FirstResult = First.getReg(0);
+ Register FirstCarry = First.getReg(1);
+ auto ZextCarryIn = B.buildZExtOrTrunc(OperandTy, CarryIn);
+ auto Second = B.buildUAddo(DstTy, CarryTy, FirstResult, ZextCarryIn);
+ Register Result = Second.getReg(0);
+ Register SecondCarry = Second.getReg(1);
+ B.buildCopy(Dst, Result);
+ B.buildOr(Carry, FirstCarry, SecondCarry);
+ };
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-adde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-adde.mir
new file mode 100644
index 00000000000000..61c7f56f4b2605
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-adde.mir
@@ -0,0 +1,300 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+# add, _ = sadde(_, _, In)
+name: carryout_unused
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: carryout_unused
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %carry_in:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY]]
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %carry_in(s1)
+ ; CHECK-NEXT: %add:_(s64) = G_ADD [[ADD]], [[ZEXT]]
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = COPY %3
+ %rhs:_(s64) = COPY %3
+ %carry_in:_(s1) = G_TRUNC %4
+ %add:_(s64), %carry_out:_(s1) = G_SADDE %lhs, %rhs, %carry_in
+ $x0 = COPY %add
+...
+---
+# add, _ = uadde(_, _, In)
+name: carryout_unused_unsigned
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: carryout_unused_unsigned
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %carry_in:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: %add:_(s64), %carry_out:_(s1) = G_UADDE [[COPY]], [[COPY]], %carry_in
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = COPY %3
+ %rhs:_(s64) = COPY %3
+ %carry_in:_(s1) = G_TRUNC %4
+ %add:_(s64), %carry_out:_(s1) = G_UADDE %lhs, %rhs, %carry_in
+ $x0 = COPY %add
+...
+---
+# add, multi_c = sadde(L, R, In)
+name: multi_use_unsigned
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: multi_use_unsigned
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %carry_in:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: %add:_(s64), %carry_out:_(s1) = G_UADDE [[COPY]], [[COPY]], %carry_in
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT %carry_out(s1)
+ ; CHECK-NEXT: %carry_out_ext2:_(s64) = G_ANYEXT %carry_out(s1)
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ ; CHECK-NEXT: $x2 = COPY %carry_out_ext2(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = COPY %3
+ %rhs:_(s64) = COPY %3
+ %carry_in:_(s1) = G_TRUNC %4
+ %add:_(s64), %carry_out:_(s1) = G_UADDE %lhs, %rhs, %carry_in
+ %carry_out_ext:_(s64) = G_ANYEXT %carry_out
+ %carry_out_ext2:_(s64) = G_ANYEXT %carry_out
+ $x0 = COPY %add
+ $x1 = COPY %carry_out_ext
+ $x2 = COPY %carry_out_ext2
+...
+---
+# add, c = sadde(L, R, In)
+name: constant_fold_signed
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: constant_fold_signed
+ ; CHECK: %add:_(s64) = G_CONSTANT i64 29
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = G_CONSTANT i64 1
+ %lhs:_(s64) = G_CONSTANT i64 11
+ %rhs:_(s64) = G_CONSTANT i64 17
+ %carry_in:_(s1) = G_CONSTANT i1 1
+ %add:_(s64), %carry_out:_(s1) = G_SADDE %lhs, %rhs, %carry_in
+ %carry_out_ext:_(s64) = G_ANYEXT %carry_out
+ $x0 = COPY %add
+ $x1 = COPY %carry_out_ext
+...
+---
+# add, c = uadde(L, R, In)
+name: constant_fold_unsigned
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: constant_fold_unsigned
+ ; CHECK: %add:_(s64) = G_CONSTANT i64 27
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = G_CONSTANT i64 1
+ %lhs:_(s64) = G_CONSTANT i64 19
+ %rhs:_(s64) = G_CONSTANT i64 7
+ %carry_in:_(s1) = G_CONSTANT i1 1
+ %add:_(s64), %carry_out:_(s1) = G_UADDE %lhs, %rhs, %carry_in
+ %carry_out_ext:_(s64) = G_ANYEXT %carry_out
+ $x0 = COPY %add
+ $x1 = COPY %carry_out_ext
+...
+---
+# add, c = uadde(L, R, In)
+name: canonicalize_to_rhs_plus_lower
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: canonicalize_to_rhs_plus_lower
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %lhs:_(s64) = G_CONSTANT i64 19
+ ; CHECK-NEXT: %carry_in:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], %lhs
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %carry_in(s1)
+ ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[ZEXT]]
+ ; CHECK-NEXT: %carry_out:_(s1) = G_OR [[UADDO1]], [[UADDO3]]
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT %carry_out(s1)
+ ; CHECK-NEXT: $x0 = COPY [[UADDO2]](s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = G_CONSTANT i64 19
+ %rhs:_(s64) = COPY %3
+ %carry_in:_(s1) = G_TRUNC %4
+ %add:_(s64), %carry_out:_(s1) = G_UADDE %lhs, %rhs, %carry_in
+ %carry_out_ext:_(s64) = G_ANYEXT %carry_out
+ $x0 = COPY %add
+ $x1 = COPY %carry_out_ext
+...
+---
+# add, c = sadde(L, R, 0)
+name: fold_to_addo_l_r
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: fold_to_addo_l_r
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %add:_(s64), %carry_out:_(s1) = G_SADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT %carry_out(s1)
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = COPY %3
+ %rhs:_(s64) = COPY %4
+ %carry_in:_(s1) = G_CONSTANT i1 0
+ %add:_(s64), %carry_out:_(s1) = G_SADDE %lhs, %rhs, %carry_in
+ %carry_out_ext:_(s64) = G_ANYEXT %carry_out
+ $x0 = COPY %add
+ $x1 = COPY %carry_out_ext
+...
+---
+# add, c = sadde(L, 0, CarryIn)
+name: fold_to_addo_l_carryin
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: fold_to_addo_l_carryin
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: %carry_in:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %carry_in(s1)
+ ; CHECK-NEXT: %add:_(s64), %carry_out:_(s1) = G_SADDO [[COPY]], [[ZEXT]]
+ ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT %carry_out(s1)
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: $x1 = COPY %carry_out_ext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %lhs:_(s64) = COPY %3
+ %rhs:_(s64) ...
[truncated]
|
Updated to match llvm#79989 / 9410019
…m#81973) This started as an experiment to reduce the compilation time of iterating over `Lower/OpenMP.cpp` a bit since it is too slow at the moment. Trying to do that, I split the `DataSharingProcessor`, `ReductionProcessor`, and `ClauseProcessor` into their own files and extracted some shared code into a util file. All of these new `.h/.cpp` files as well as `OpenMP.cpp` are now under a `Lower/OpenMP/` directory. This resulted is a slightly better organization of the OpenMP lowering code and hence opening this NFC. As for the compilation time, this unfortunately does not affect it much (it shaves off a few seconds of `OpenMP.cpp` compilation) since from what I learned the bottleneck is in `DirectivesCommon.h` and `PFTBuilder.h` which both consume a lot of time in template instantiation it seems.
…72490) When SVE register size is unknown or the minimal size is not equal to the maximum size then we could determine the actual SVE register size in the runtime and adjust shuffle mask in the runtime.
…revisited" (llvm#82358) This patch updates our internal notion of `layout-compatible` to ignore cv-qualification, which in turn fixes `__is_layout_compatible` intrinsic.
When an integer argument is promoted and *not* split (like i72 -> i128 on a new machine with vector support), the SlotVT should be i128, which is stored in VT - not ArgVT. Fixes llvm#81417
llvm#81245) This commit simplifies the internal state of the dialect conversion. A separate field for the previous state of in-place op modifications is no longer needed.
…ion (llvm#82474) The dialect conversion rolls back in-place op modifications upon failure. Rolling back modifications of attributes is already supported, but there was no support for properties until now.
* When converting a block signature, `ArgConverter` creates a new block with the new signature and moves all operation from the old block to the new block. The new block is temporarily inserted into a region that is stored in `regionMapping`. The old block is not yet deleted, so that the conversion can be rolled back. `regionMapping` is not needed. Instead of moving the old block to a temporary region, it can just be unlinked. Block erasures are handles in the same way in the dialect conversion. * `regionToConverter` is a mapping from regions to type converter. That field is never accessed within `ArgConverter`. It should be stored in `ConversionPatternRewriterImpl` instead. * `convertedBlocks` is not needed. Old blocks are already stored in `ConvertedBlockInfo`.
…ct vlen (llvm#82405) If we have exact vlen knowledge, we can figure out which indices correspond to register boundaries. Our lowering uses this knowledge to replace the vslidedown.vi with a sub-register extract. Our costs can reflect that as well. This is another piece split off llvm#80164 --------- Co-authored-by: Luke Lau <luke_lau@icloud.com>
Summary: This was missed, the NVPTX globals cannot use a `.`.
Pretty sure this isn't doing anything, but it fixes a test and is generally the right thing to do. Fixing the behavior will come later.
This is really to test for icmp vs constant - some icmp unsigned could fold to simpler comparisons, but costmodel analysis won't do this
…hen an operand is constant In most cases, SETCC lowering will be able to simplify/commute the comparison by adjusting the constant. TODO: We still need to adjust ExtraCost based on CostKind Fixes llvm#80122
…m#82394) [LLVM][DWARF] Refactor code for generating DWARF v5 .debug_names Refactor the code that uniques the entries and computes the bucket count for the DWARF V5 .debug_names accelerator table.
Instead of asserting, emit an appropriate diagnostic.
When a `ModifyOperationRewrite` is committed, the operation may already have been erased, so `OperationName` must be cached in the rewrite object. Note: This will no longer be needed with llvm#81757, which adds a "cleanup" method to `IRRewrite`.
…e. NFC This shows the issue in llvm#82430, but triggers it via the widening SEW combine rather than a GEP that RISCVGatherScatterLowering doesn't detect.
…lvm#80904)" This reverts commit b1ac052. This commit breaks coroutine splitting for non-swift calling convention functions. In this example: ```ll ; ModuleID = 'repro.ll' source_filename = "stdlib/test/runtime/test_llcl.mojo" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @0 = internal constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint (ptr @craSH to i64), i64 ptrtoint (ptr getelementptr inbounds ({ i32, i32 }, ptr @0, i32 0, i32 1) to i64)) to i32), i32 64 } define dso_local void @af_suspend_fn(ptr %0, i64 %1, ptr %2) #0 { ret void } define dso_local void @craSH(ptr %0) #0 { %2 = call token @llvm.coro.id.async(i32 64, i32 8, i32 0, ptr @0) %3 = call ptr @llvm.coro.begin(token %2, ptr null) %4 = getelementptr inbounds { ptr, { ptr, ptr }, i64, { ptr, i1 }, i64, i64 }, ptr poison, i32 0, i32 0 %5 = call ptr @llvm.coro.async.resume() store ptr %5, ptr %4, align 8 %6 = call { ptr, ptr, ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0p0p0s(i32 0, ptr %5, ptr @ctxt_proj_fn, ptr @af_suspend_fn, ptr poison, i64 -1, ptr poison) ret void } define dso_local ptr @ctxt_proj_fn(ptr %0) #0 { ret ptr %0 } ; Function Attrs: nomerge nounwind declare { ptr, ptr, ptr } @llvm.coro.suspend.async.sl_p0p0p0s(i32, ptr, ptr, ...) llvm#1 ; Function Attrs: nounwind declare token @llvm.coro.id.async(i32, i32, i32, ptr) llvm#2 ; Function Attrs: nounwind declare ptr @llvm.coro.begin(token, ptr writeonly) llvm#2 ; Function Attrs: nomerge nounwind declare ptr @llvm.coro.async.resume() llvm#1 attributes #0 = { "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clwb,+clzero,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" } attributes llvm#1 = { nomerge nounwind } attributes llvm#2 = { nounwind } ``` This verifier crashes after the `coro-split` pass with ``` cannot guarantee tail call due to mismatched parameter counts musttail call void @af_suspend_fn(ptr poison, i64 -1, ptr poison) LLVM ERROR: Broken function PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. Stack dump: 0. Program arguments: opt ../../../reduced.ll -O0 #0 0x00007f1d89645c0e __interceptor_backtrace.part.0 /build/gcc-11-XeT9lY/gcc-11-11.4.0/build/x86_64-linux-gnu/libsanitizer/asan/../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:4193:28 llvm#1 0x0000556d94d254f7 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:22 llvm#2 0x0000556d94d19a2f llvm::sys::RunSignalHandlers() /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Support/Signals.cpp:105:20 llvm#3 0x0000556d94d1aa42 SignalHandler(int) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Support/Unix/Signals.inc:371:36 llvm#4 0x00007f1d88e42520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520) llvm#5 0x00007f1d88e969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76 llvm#6 0x00007f1d88e969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10 llvm#7 0x00007f1d88e969fc pthread_kill ./nptl/pthread_kill.c:89:10 llvm#8 0x00007f1d88e42476 gsignal ./signal/../sysdeps/posix/raise.c:27:6 llvm#9 0x00007f1d88e287f3 abort ./stdlib/abort.c:81:7 llvm#10 0x0000556d8944be01 std::vector<llvm::json::Value, std::allocator<llvm::json::Value>>::size() const /usr/include/c++/11/bits/stl_vector.h:919:40 llvm#11 0x0000556d8944be01 bool std::operator==<llvm::json::Value, std::allocator<llvm::json::Value>>(std::vector<llvm::json::Value, std::allocator<llvm::json::Value>> const&, std::vector<llvm::json::Value, std::allocator<llvm::json::Value>> const&) /usr/include/c++/11/bits/stl_vector.h:1893:23 llvm#12 0x0000556d8944be01 llvm::json::operator==(llvm::json::Array const&, llvm::json::Array const&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/Support/JSON.h:572:69 llvm#13 0x0000556d8944be01 llvm::json::operator==(llvm::json::Value const&, llvm::json::Value const&) (.cold) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Support/JSON.cpp:204:28 llvm#14 0x0000556d949ed2bd llvm::report_fatal_error(char const*, bool) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Support/ErrorHandling.cpp:82:70 llvm#15 0x0000556d8e37e876 llvm::SmallVectorBase<unsigned int>::size() const /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallVector.h:91:32 llvm#16 0x0000556d8e37e876 llvm::SmallVectorTemplateCommon<llvm::DiagnosticInfoOptimizationBase::Argument, void>::end() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallVector.h:282:41 llvm#17 0x0000556d8e37e876 llvm::SmallVector<llvm::DiagnosticInfoOptimizationBase::Argument, 4u>::~SmallVector() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallVector.h:1215:24 llvm#18 0x0000556d8e37e876 llvm::DiagnosticInfoOptimizationBase::~DiagnosticInfoOptimizationBase() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h:413:7 llvm#19 0x0000556d8e37e876 llvm::DiagnosticInfoIROptimization::~DiagnosticInfoIROptimization() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h:622:7 llvm#20 0x0000556d8e37e876 llvm::OptimizationRemark::~OptimizationRemark() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/DiagnosticInfo.h:689:7 llvm#21 0x0000556d8e37e876 operator() /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp:2213:14 llvm#22 0x0000556d8e37e876 emit<llvm::CoroSplitPass::run(llvm::LazyCallGraph::SCC&, llvm::CGSCCAnalysisManager&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&)::<lambda()> > /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h:83:12 llvm#23 0x0000556d8e37e876 llvm::CoroSplitPass::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp:2212:13 llvm#24 0x0000556d8c36ecb1 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::CoroSplitPass, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3 llvm#25 0x0000556d91c1a84f llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:90:12 llvm#26 0x0000556d8c3690d1 llvm::detail::PassModel<llvm::LazyCallGraph::SCC, llvm::PassManager<llvm::LazyCallGraph::SCC, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&>::run(llvm::LazyCallGraph::SCC&, llvm::AnalysisManager<llvm::LazyCallGraph::SCC, llvm::LazyCallGraph&>&, llvm::LazyCallGraph&, llvm::CGSCCUpdateResult&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3 llvm#27 0x0000556d91c2162d llvm::ModuleToPostOrderCGSCCPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:278:18 llvm#28 0x0000556d8c369035 llvm::detail::PassModel<llvm::Module, llvm::ModuleToPostOrderCGSCCPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3 llvm#29 0x0000556d9457abc5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManager.h:247:20 llvm#30 0x0000556d8e30979e llvm::CoroConditionalWrapper::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp:19:74 llvm#31 0x0000556d8c365755 llvm::detail::PassModel<llvm::Module, llvm::CoroConditionalWrapper, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3 llvm#32 0x0000556d9457abc5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/PassManager.h:247:20 llvm#33 0x0000556d89818556 llvm::SmallPtrSetImplBase::isSmall() const /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:196:33 llvm#34 0x0000556d89818556 llvm::SmallPtrSetImplBase::~SmallPtrSetImplBase() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:84:17 llvm#35 0x0000556d89818556 llvm::SmallPtrSetImpl<llvm::AnalysisKey*>::~SmallPtrSetImpl() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:321:7 llvm#36 0x0000556d89818556 llvm::SmallPtrSet<llvm::AnalysisKey*, 2u>::~SmallPtrSet() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:427:7 llvm#37 0x0000556d89818556 llvm::PreservedAnalyses::~PreservedAnalyses() /home/ubuntu/modular/third-party/llvm-project/llvm/include/llvm/IR/Analysis.h:109:7 llvm#38 0x0000556d89818556 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) /home/ubuntu/modular/third-party/llvm-project/llvm/tools/opt/NewPMDriver.cpp:532:10 llvm#39 0x0000556d897e3939 optMain /home/ubuntu/modular/third-party/llvm-project/llvm/tools/opt/optdriver.cpp:737:27 llvm#40 0x0000556d89455461 main /home/ubuntu/modular/third-party/llvm-project/llvm/tools/opt/opt.cpp:25:33 llvm#41 0x00007f1d88e29d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16 llvm#42 0x00007f1d88e29e40 call_init ./csu/../csu/libc-start.c:128:20 llvm#43 0x00007f1d88e29e40 __libc_start_main ./csu/../csu/libc-start.c:379:5 llvm#44 0x0000556d897b6335 _start (/home/ubuntu/modular/.derived/third-party/llvm-project/build-relwithdebinfo-asan/bin/opt+0x150c335) Aborted (core dumped)
Function::Function's constructor sets the debug info format based on the passed in parent Module, so by using this rather than modifying the function list directly, we pick up the debug info format automatically.
…#81966) This PR adds an optional bitwidth parameter to the vector xfer op flattening transformation so that the flattening doesn't happen if the trailing dimension of the read/writen vector is larger than this bitwidth (i.e., we are already able to fill at least one vector register with that size).
…e/exit data constructs (llvm#81610) This patch adds support in flang for the depend clause in target and target enter/update/exit constructs. Previously, the following line in a fortran program would have resulted in the error shown below it. !$omp target map(to:a) depend(in:a) "not yet implemented: Unhandled clause DEPEND in TARGET construct"
Summary: Currently, OpenMP handles the `omp requires` clause by emitting a global constructor into the runtime for every translation unit that requires it. However, this is not a great solution because it prevents us from having a defined order in which the runtime is accessed and used. This patch changes the approach to no longer use global constructors, but to instead group the flag with the other offloading entires that we already handle. This has the effect of still registering each flag per requires TU, but now we have a single constructor that handles everything. This function removes support for the old `__tgt_register_requires` and replaces it with a warning message. We just had a recent release, and the OpenMP policy for the past four releases since we switched to LLVM is that we do not provide strict backwards compatibility between major LLVM releases now that the library is versioned. This means that a user will need to recompile if they have an old binary that relied on `register_requires` having the old behavior. It is important that we actively deprecate this, as otherwise it would not solve the problem of having no defined init and shutdown order for `libomptarget`. The problem of `libomptarget` not having a define init and shutdown order cascades into a lot of other issues so I have a strong incentive to be rid of it. It is worth noting that the current `__tgt_offload_entry` only has space for a 32-bit integer here. I am planning to overhaul these at some point as well.
We did something pretty naive: - round FP64 -> BF16 by first rounding to FP32 - skip FP32 -> BF16 rounding entirely - taking the top 16 bits of a FP32 which will turn some NaNs into infinities Let's do this in a more principled way by rounding types with more precision than FP32 to FP32 using round-inexact-to-odd which will negate double rounding issues.
llvm#82293) Installapi has important distinctions when compared to the clang driver, so much that, it doesn't make much sense to try to integrate into it. This patch partially reverts the CC1 action & driver support to replace with its own driver as a clang tool. For distribution, we could use `LLVM_TOOL_LLVM_DRIVER_BUILD` mechanism for integrating the functionality into clang such that the toolchain size is less impacted.
…llvm#82612) Update include-cleaner tests. Now that we have proper found-decls set up for VarTemplates, in case of instationtations we point to primary templates and not specializations. To be changed in a follow-up patch.
…m#68140) The checker reported a false positive on this code void testTaintedSanitizedVLASize(void) { int x; scanf("%d", &x); if (x<1) return; int vla[x]; // no-warning } After the fix, the checker only emits tainted warning if the vla size is coming from a tainted source and it cannot prove that it is positive.
… `ConversionPatternRewriter` (llvm#82333) `ConversionPatternRewriterImpl` no longer maintains a reference to the respective `ConversionPatternRewriter`. An `MLIRContext` is sufficient. This commit simplifies the internal state of `ConversionPatternRewriterImpl`.
Despite of a valid tail call opportunity, backends still may not generate a tail call or such lowering is not implemented yet. Check that lowering has happened instead of its possibility when generating G_ASSERT_ALIGN.
…m#82648) We previously defined svzero_za as: void svzero_za(); rather than: void svzero_za(void); Which meant that Clang accepted arguments. Compiling for example `svzero_za(<non-constant integer>)` ended up with incorrect IR and a compiler crash because it couldn't select an instruction for it.
Patch 2 of 3 to add llvm.dbg.label support to the RemoveDIs project. The patch stack adds the DPLabel class, which is the RemoveDIs llvm.dbg.label equivalent. 1. Add DbgRecord base class for DPValue and the not-yet-added DPLabel class. 2. Add the DPLabel class. -> 3. Add support to passes. The next patch, llvm#82639, will enable conversion between dbg.labels and DPLabels. AssignemntTrackingAnalysis support could have gone two ways: 1. Have the analysis store a DPLabel representation in its results - SelectionDAGBuilder reads the analysis results and ignores all DbgRecord kinds. 2. Ignore DPLabels in the analysis - SelectionDAGBuilder reads the analysis results but still needs to iterate over DPLabels from the IR. I went with option 2 because it's less work and is no less correct than 1. It's worth noting that causes labels to sink to the bottom of packs of debug records. e.g., [value, label, value] becomes [value, value, label]. This shouldn't be a problem because labels and variable locations don't have an ordering requirement. The ordering between variable locations is maintained and the label movement is deterministic
This test was added in llvm#82648
MLIRArmSMETestPasses was added in llvm@b39f566, we need to add a build rule for it as well.
The bug affects dpp forms of v_dot2_f32_f16. The encoding does not match SP3 and does not set op_sel_hi bits properly.
`%ld` specifier is defined to work on values of type `long`. The parameter given to `fprintf` is of type `intptr_t` whose actual underlying integer type is unspecified. On Unix systems it happens to commonly be `long` but on 64-bit Windows it is defined as `long long`. The cross-platform way to print a `intptr_t` is to use `PRIdPTR` which expands to the correct format specifier for `intptr_t`. This avoids any undefined behaviour and compiler warnings.
TestArmSME was added in llvm@e132643, now we need to add dependency on it.
…remetal (llvm#81727) The below culprit patch enabled the generation of asynchronous unwind tables (-funwind-tables=2) by default for RISCV for both linux and RISCVToolChain baremetal object. However, since there are 2 baremetal toolchain objects for RISCV, this created a discrepancy between their behavior. Moreover, enabling the generation of asynchronous unwind tables based on whether `-gcc-toolchain` option is present or not doesn't seem to be the best criteria to decide on the same. This patch make the behavior consistent by disabling the unwind tables in RISCVToolChain Baremetal object. Culprit Patch - https://reviews.llvm.org/D145164
These patterns can already be used via populateMathPolynomialApproximationPatterns, but that includes a number of other patterns that may not be needed. There are already similar functions for expansion. For now only adding tanh and erf since I have a concrete use case for these two.
…m#82523) When reconstructing lines from a macro expansion, make sure that lines at different levels in the expanded code get indented correctly as part of the macro argument.
Clang has them as builtins (__builtin_addc). The middle end has no intrinsics for them. They are used in legalization operations. AArch64: ADCS Add with carry and set flags On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines.
Clang has them as builtins (__builtin_addc). The middle end has no intrinsics for them. They are used in legalization operations.
AArch64: ADCS Add with carry and set flags
On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines.