274 changes: 265 additions & 9 deletions clang/lib/Sema/SemaOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,36 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause(
if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
return isNotImplemented();

// OpenACC 3.3 Section 2.9.11: A reduction clause may not appear on a loop
// directive that has a gang clause and is within a compute construct that has
// a num_gangs clause with more than one explicit argument.
if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Loop &&
SemaRef.getActiveComputeConstructInfo().Kind !=
OpenACCDirectiveKind::Invalid) {
// num_gangs clause on the active compute construct.
auto *NumGangsClauseItr =
llvm::find_if(SemaRef.getActiveComputeConstructInfo().Clauses,
llvm::IsaPred<OpenACCNumGangsClause>);

auto *ReductionClauseItr =
llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCReductionClause>);

if (ReductionClauseItr != ExistingClauses.end() &&
NumGangsClauseItr !=
SemaRef.getActiveComputeConstructInfo().Clauses.end() &&
cast<OpenACCNumGangsClause>(*NumGangsClauseItr)->getIntExprs().size() >
1) {
SemaRef.Diag(Clause.getBeginLoc(),
diag::err_acc_gang_reduction_numgangs_conflict)
<< OpenACCClauseKind::Gang << OpenACCClauseKind::Reduction;
SemaRef.Diag((*ReductionClauseItr)->getBeginLoc(),
diag::note_acc_previous_clause_here);
SemaRef.Diag((*NumGangsClauseItr)->getBeginLoc(),
diag::note_acc_previous_clause_here);
return nullptr;
}
}

llvm::SmallVector<OpenACCGangKind> GangKinds;
llvm::SmallVector<Expr *> IntExprs;

Expand Down Expand Up @@ -1316,9 +1346,9 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause(
return nullptr;
}

return OpenACCGangClause::Create(Ctx, Clause.getBeginLoc(),
Clause.getLParenLoc(), GangKinds, IntExprs,
Clause.getEndLoc());
return SemaRef.CheckGangClause(ExistingClauses, Clause.getBeginLoc(),
Clause.getLParenLoc(), GangKinds, IntExprs,
Clause.getEndLoc());
}

OpenACCClause *SemaOpenACCClauseVisitor::VisitSeqClause(
Expand Down Expand Up @@ -1367,9 +1397,57 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitReductionClause(
// Restrictions only properly implemented on 'compute' constructs, and
// 'compute' constructs are the only construct that can do anything with
// this yet, so skip/treat as unimplemented in this case.
if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
return isNotImplemented();

// OpenACC 3.3 Section 2.9.11: A reduction clause may not appear on a loop
// directive that has a gang clause and is within a compute construct that has
// a num_gangs clause with more than one explicit argument.
if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Loop &&
SemaRef.getActiveComputeConstructInfo().Kind !=
OpenACCDirectiveKind::Invalid) {
// num_gangs clause on the active compute construct.
auto *NumGangsClauseItr =
llvm::find_if(SemaRef.getActiveComputeConstructInfo().Clauses,
llvm::IsaPred<OpenACCNumGangsClause>);

auto *GangClauseItr =
llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCGangClause>);

if (GangClauseItr != ExistingClauses.end() &&
NumGangsClauseItr !=
SemaRef.getActiveComputeConstructInfo().Clauses.end() &&
cast<OpenACCNumGangsClause>(*NumGangsClauseItr)->getIntExprs().size() >
1) {
SemaRef.Diag(Clause.getBeginLoc(),
diag::err_acc_gang_reduction_numgangs_conflict)
<< OpenACCClauseKind::Reduction << OpenACCClauseKind::Gang;
SemaRef.Diag((*GangClauseItr)->getBeginLoc(),
diag::note_acc_previous_clause_here);
SemaRef.Diag((*NumGangsClauseItr)->getBeginLoc(),
diag::note_acc_previous_clause_here);
return nullptr;
}
}

// OpenACC3.3 Section 2.9.11: If a variable is involved in a reduction that
// spans multiple nested loops where two or more of those loops have
// associated loop directives, a reduction clause containing that variable
// must appear on each of those loop directives.
//
// This can't really be implemented in the CFE, as this requires a level of
// rechability/useage analysis that we're not really wanting to get into.
// Additionally, I'm alerted that this restriction is one that the middle-end
// can just 'figure out' as an extension and isn't really necessary.
//
// OpenACC3.3 Section 2.9.11: Every 'var' in a reduction clause appearing on
// an orphaned loop construct must be private.
//
// This again is something we cannot really diagnose, as it requires we see
// all the uses/scopes of all variables referenced. The middle end/MLIR might
// be able to diagnose this.

// OpenACC 3.3 Section 2.5.4:
// A reduction clause may not appear on a parallel construct with a
// num_gangs clause that has more than one argument.
Expand All @@ -1394,15 +1472,17 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitReductionClause(
SmallVector<Expr *> ValidVars;

for (Expr *Var : Clause.getVarList()) {
ExprResult Res = SemaRef.CheckReductionVar(Var);
ExprResult Res = SemaRef.CheckReductionVar(Clause.getDirectiveKind(),
Clause.getReductionOp(), Var);

if (Res.isUsable())
ValidVars.push_back(Res.get());
}

return OpenACCReductionClause::Create(
Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getReductionOp(),
ValidVars, Clause.getEndLoc());
return SemaRef.CheckReductionClause(
ExistingClauses, Clause.getDirectiveKind(), Clause.getBeginLoc(),
Clause.getLParenLoc(), Clause.getReductionOp(), ValidVars,
Clause.getEndLoc());
}

OpenACCClause *SemaOpenACCClauseVisitor::VisitCollapseClause(
Expand All @@ -1425,6 +1505,16 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCollapseClause(
LoopCount.get(), Clause.getEndLoc());
}

void CollectActiveReductionClauses(
llvm::SmallVector<OpenACCReductionClause *> &ActiveClauses,
ArrayRef<OpenACCClause *> CurClauses) {
for (auto *CurClause : CurClauses) {
if (auto *RedClause = dyn_cast<OpenACCReductionClause>(CurClause);
RedClause && !RedClause->getVarList().empty())
ActiveClauses.push_back(RedClause);
}
}

} // namespace

SemaOpenACC::SemaOpenACC(Sema &S) : SemaBase(S) {}
Expand All @@ -1437,11 +1527,14 @@ SemaOpenACC::AssociatedStmtRAII::AssociatedStmtRAII(
DirKind(DK), OldLoopGangClauseOnKernelLoc(S.LoopGangClauseOnKernelLoc),
OldLoopWorkerClauseLoc(S.LoopWorkerClauseLoc),
OldLoopVectorClauseLoc(S.LoopVectorClauseLoc),
ActiveReductionClauses(S.ActiveReductionClauses),
LoopRAII(SemaRef, /*PreserveDepth=*/false) {

// Compute constructs end up taking their 'loop'.
if (DirKind == OpenACCDirectiveKind::Parallel ||
DirKind == OpenACCDirectiveKind::Serial ||
DirKind == OpenACCDirectiveKind::Kernels) {
CollectActiveReductionClauses(S.ActiveReductionClauses, Clauses);
SemaRef.ActiveComputeConstructInfo.Kind = DirKind;
SemaRef.ActiveComputeConstructInfo.Clauses = Clauses;
SemaRef.ParentlessLoopConstructs.swap(ParentlessLoopConstructs);
Expand All @@ -1456,6 +1549,7 @@ SemaOpenACC::AssociatedStmtRAII::AssociatedStmtRAII(
SemaRef.LoopWorkerClauseLoc = {};
SemaRef.LoopVectorClauseLoc = {};
} else if (DirKind == OpenACCDirectiveKind::Loop) {
CollectActiveReductionClauses(S.ActiveReductionClauses, Clauses);
SetCollapseInfoBeforeAssociatedStmt(UnInstClauses, Clauses);
SetTileInfoBeforeAssociatedStmt(UnInstClauses, Clauses);

Expand Down Expand Up @@ -1559,6 +1653,7 @@ SemaOpenACC::AssociatedStmtRAII::~AssociatedStmtRAII() {
SemaRef.LoopGangClauseOnKernelLoc = OldLoopGangClauseOnKernelLoc;
SemaRef.LoopWorkerClauseLoc = OldLoopWorkerClauseLoc;
SemaRef.LoopVectorClauseLoc = OldLoopVectorClauseLoc;
SemaRef.ActiveReductionClauses.swap(ActiveReductionClauses);

if (DirKind == OpenACCDirectiveKind::Parallel ||
DirKind == OpenACCDirectiveKind::Serial ||
Expand Down Expand Up @@ -1610,14 +1705,78 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,

}

namespace {
// Return true if the two vars refer to the same variable, for the purposes of
// equality checking.
bool areVarsEqual(Expr *VarExpr1, Expr *VarExpr2) {
if (VarExpr1->isInstantiationDependent() ||
VarExpr2->isInstantiationDependent())
return false;

VarExpr1 = VarExpr1->IgnoreParenCasts();
VarExpr2 = VarExpr2->IgnoreParenCasts();

// Legal expressions can be: Scalar variable reference, sub-array, array
// element, or composite variable member.

// Sub-array.
if (isa<ArraySectionExpr>(VarExpr1)) {
auto *Expr2AS = dyn_cast<ArraySectionExpr>(VarExpr2);
if (!Expr2AS)
return false;

auto *Expr1AS = cast<ArraySectionExpr>(VarExpr1);

if (!areVarsEqual(Expr1AS->getBase(), Expr2AS->getBase()))
return false;
// We could possibly check to see if the ranges aren't overlapping, but it
// isn't clear that the rules allow this.
return true;
}

// Array-element.
if (isa<ArraySubscriptExpr>(VarExpr1)) {
auto *Expr2AS = dyn_cast<ArraySubscriptExpr>(VarExpr2);
if (!Expr2AS)
return false;

auto *Expr1AS = cast<ArraySubscriptExpr>(VarExpr1);

if (!areVarsEqual(Expr1AS->getBase(), Expr2AS->getBase()))
return false;

// We could possibly check to see if the elements referenced aren't the
// same, but it isn't clear by reading of the standard that this is allowed
// (and that the 'var' refered to isn't the array).
return true;
}

// Scalar variable reference, or composite variable.
if (isa<DeclRefExpr>(VarExpr1)) {
auto *Expr2DRE = dyn_cast<DeclRefExpr>(VarExpr2);
if (!Expr2DRE)
return false;

auto *Expr1DRE = cast<DeclRefExpr>(VarExpr1);

return Expr1DRE->getDecl()->getMostRecentDecl() ==
Expr2DRE->getDecl()->getMostRecentDecl();
}

llvm_unreachable("Unknown variable type encountered");
}
} // namespace

/// OpenACC 3.3 section 2.5.15:
/// At a mininmum, the supported data types include ... the numerical data types
/// in C, C++, and Fortran.
///
/// If the reduction var is a composite variable, each
/// member of the composite variable must be a supported datatype for the
/// reduction operation.
ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) {
ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind,
OpenACCReductionOperator ReductionOp,
Expr *VarExpr) {
VarExpr = VarExpr->IgnoreParenCasts();

auto TypeIsValid = [](QualType Ty) {
Expand Down Expand Up @@ -1667,6 +1826,28 @@ ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) {
return ExprError();
}

// OpenACC3.3: 2.9.11: Reduction clauses on nested constructs for the same
// reduction 'var' must have the same reduction operator.
if (!VarExpr->isInstantiationDependent()) {

for (const OpenACCReductionClause *RClause : ActiveReductionClauses) {
if (RClause->getReductionOp() == ReductionOp)
break;

for (Expr *OldVarExpr : RClause->getVarList()) {
if (OldVarExpr->isInstantiationDependent())
continue;

if (areVarsEqual(VarExpr, OldVarExpr)) {
Diag(VarExpr->getExprLoc(), diag::err_reduction_op_mismatch)
<< ReductionOp << RClause->getReductionOp();
Diag(OldVarExpr->getExprLoc(), diag::note_acc_previous_clause_here);
return ExprError();
}
}
}
}

return VarExpr;
}

Expand Down Expand Up @@ -2223,6 +2404,81 @@ ExprResult SemaOpenACC::CheckGangExpr(OpenACCGangKind GK, Expr *E) {
llvm_unreachable("Compute construct directive not handled?");
}

OpenACCClause *
SemaOpenACC::CheckGangClause(ArrayRef<const OpenACCClause *> ExistingClauses,
SourceLocation BeginLoc, SourceLocation LParenLoc,
ArrayRef<OpenACCGangKind> GangKinds,
ArrayRef<Expr *> IntExprs, SourceLocation EndLoc) {
// OpenACC 3.3 2.9.11: A reduction clause may not appear on a loop directive
// that has a gang clause with a dim: argument whose value is greater than 1.

const auto *ReductionItr =
llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCReductionClause>);

if (ReductionItr != ExistingClauses.end()) {
const auto GangZip = llvm::zip_equal(GangKinds, IntExprs);
const auto GangItr = llvm::find_if(GangZip, [](const auto &Tuple) {
return std::get<0>(Tuple) == OpenACCGangKind::Dim;
});

if (GangItr != GangZip.end()) {
const Expr *DimExpr = std::get<1>(*GangItr);

assert(
(DimExpr->isInstantiationDependent() || isa<ConstantExpr>(DimExpr)) &&
"Improperly formed gang argument");
if (const auto *DimVal = dyn_cast<ConstantExpr>(DimExpr);
DimVal && DimVal->getResultAsAPSInt() > 1) {
Diag(DimVal->getBeginLoc(), diag::err_acc_gang_reduction_conflict)
<< /*gang/reduction=*/0;
Diag((*ReductionItr)->getBeginLoc(),
diag::note_acc_previous_clause_here);
return nullptr;
}
}
}

return OpenACCGangClause::Create(getASTContext(), BeginLoc, LParenLoc,
GangKinds, IntExprs, EndLoc);
}

OpenACCClause *SemaOpenACC::CheckReductionClause(
ArrayRef<const OpenACCClause *> ExistingClauses,
OpenACCDirectiveKind DirectiveKind, SourceLocation BeginLoc,
SourceLocation LParenLoc, OpenACCReductionOperator ReductionOp,
ArrayRef<Expr *> Vars, SourceLocation EndLoc) {
if (DirectiveKind == OpenACCDirectiveKind::Loop) {
// OpenACC 3.3 2.9.11: A reduction clause may not appear on a loop directive
// that has a gang clause with a dim: argument whose value is greater
// than 1.
const auto *GangItr =
llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCGangClause>);

while (GangItr != ExistingClauses.end()) {
auto *GangClause = cast<OpenACCGangClause>(*GangItr);
for (unsigned I = 0; I < GangClause->getNumExprs(); ++I) {
std::pair<OpenACCGangKind, const Expr *> EPair = GangClause->getExpr(I);
// We know there is only 1 on this gang, so move onto the next gang.
if (EPair.first != OpenACCGangKind::Dim)
break;

if (const auto *DimVal = dyn_cast<ConstantExpr>(EPair.second);
DimVal && DimVal->getResultAsAPSInt() > 1) {
Diag(BeginLoc, diag::err_acc_gang_reduction_conflict)
<< /*reduction/gang=*/1;
Diag((*GangItr)->getBeginLoc(), diag::note_acc_previous_clause_here);
return nullptr;
}
}
++GangItr;
}
}

auto *Ret = OpenACCReductionClause::Create(
getASTContext(), BeginLoc, LParenLoc, ReductionOp, Vars, EndLoc);
return Ret;
}

ExprResult SemaOpenACC::CheckTileSizeExpr(Expr *SizeExpr) {
if (!SizeExpr)
return ExprError();
Expand Down
18 changes: 9 additions & 9 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -11935,15 +11935,16 @@ void OpenACCClauseTransform<Derived>::VisitReductionClause(
SmallVector<Expr *> ValidVars;

for (Expr *Var : TransformedVars) {
ExprResult Res = Self.getSema().OpenACC().CheckReductionVar(Var);
ExprResult Res = Self.getSema().OpenACC().CheckReductionVar(
ParsedClause.getDirectiveKind(), C.getReductionOp(), Var);
if (Res.isUsable())
ValidVars.push_back(Res.get());
}

NewClause = OpenACCReductionClause::Create(
Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),
ParsedClause.getLParenLoc(), C.getReductionOp(), ValidVars,
ParsedClause.getEndLoc());
NewClause = Self.getSema().OpenACC().CheckReductionClause(
ExistingClauses, ParsedClause.getDirectiveKind(),
ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(),
C.getReductionOp(), ValidVars, ParsedClause.getEndLoc());
}

template <typename Derived>
Expand Down Expand Up @@ -12018,10 +12019,9 @@ void OpenACCClauseTransform<Derived>::VisitGangClause(
TransformedIntExprs.push_back(ER.get());
}

NewClause = OpenACCGangClause::Create(
Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),
ParsedClause.getLParenLoc(), TransformedGangKinds, TransformedIntExprs,
ParsedClause.getEndLoc());
NewClause = Self.getSema().OpenACC().CheckGangClause(
ExistingClauses, ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(),
TransformedGangKinds, TransformedIntExprs, ParsedClause.getEndLoc());
}
} // namespace
template <typename Derived>
Expand Down
87 changes: 87 additions & 0 deletions clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s
// RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s -fno-signed-char
// RUN: %clang_cc1 -verify=ref,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu %s

// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s -fexperimental-new-constant-interpreter
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s -fno-signed-char -fexperimental-new-constant-interpreter
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu %s -fexperimental-new-constant-interpreter

// both-no-diagnostics

typedef decltype(nullptr) nullptr_t;
typedef __INTPTR_TYPE__ intptr_t;

static_assert(sizeof(int) == 4);
static_assert(sizeof(long long) == 8);

template <class To, class From>
constexpr To bit_cast(const From &from) {
static_assert(sizeof(To) == sizeof(From));
return __builtin_bit_cast(To, from);
}

template <class Intermediate, class Init>
constexpr bool check_round_trip(const Init &init) {
return bit_cast<Init>(bit_cast<Intermediate>(init)) == init;
}

template <class Intermediate, class Init>
constexpr Init round_trip(const Init &init) {
return bit_cast<Init>(bit_cast<Intermediate>(init));
}




namespace test_long_double {
#if __x86_64
#if 0
constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}}\
// expected-note{{in call}}
#endif
constexpr long double ld = 3.1425926539;

struct bytes {
unsigned char d[16];
};

// static_assert(round_trip<bytes>(ld), "");

static_assert(round_trip<long double>(10.0L));

#if 0
constexpr bool f(bool read_uninit) {
bytes b = bit_cast<bytes>(ld);
unsigned char ld_bytes[10] = {
0x0, 0x48, 0x9f, 0x49, 0xf0,
0x3c, 0x20, 0xc9, 0x0, 0x40,
};

for (int i = 0; i != 10; ++i)
if (ld_bytes[i] != b.d[i])
return false;

if (read_uninit && b.d[10]) // expected-note{{read of uninitialized object is not allowed in a constant expression}}
return false;

return true;
}

static_assert(f(/*read_uninit=*/false), "");
static_assert(f(/*read_uninit=*/true), ""); // expected-error{{static assertion expression is not an integral constant expression}} \
// expected-note{{in call to 'f(true)'}}
#endif
constexpr bytes ld539 = {
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0xc0, 0x86,
0x8, 0x40, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
};

constexpr long double fivehundredandthirtynine = 539.0;

static_assert(bit_cast<long double>(ld539) == fivehundredandthirtynine, "");
#else
static_assert(round_trip<__int128_t>(34.0L));
#endif
}
4 changes: 3 additions & 1 deletion clang/test/AST/ByteCode/builtin-bit-cast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ static_assert(check_round_trip<__int128_t>((__int128_t)34));
static_assert(check_round_trip<__int128_t>((__int128_t)-34));
#endif

static_assert(check_round_trip<double>(17.0));


namespace simple {
constexpr int A = __builtin_bit_cast(int, 10);
Expand Down Expand Up @@ -108,7 +110,7 @@ namespace simple {
static_assert(check_round_trip<unsigned>((int)0x12345678));
static_assert(check_round_trip<unsigned>((int)0x87654321));
static_assert(check_round_trip<unsigned>((int)0x0C05FEFE));
// static_assert(round_trip<float>((int)0x0C05FEFE));
static_assert(round_trip<float>((int)0x0C05FEFE));


/// This works in GCC and in the bytecode interpreter, but the current interpreter
Expand Down
30 changes: 30 additions & 0 deletions clang/test/AST/ast-print-openacc-loop-construct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,34 @@ void foo() {
#pragma acc loop vector
for(;;);

int *iPtr;
bool SomeB;

//CHECK: #pragma acc loop reduction(+: iPtr)
#pragma acc loop reduction(+: iPtr)
for(;;);
//CHECK: #pragma acc loop reduction(*: i)
#pragma acc loop reduction(*: i)
for(;;);
//CHECK: #pragma acc loop reduction(max: SomeB)
#pragma acc loop reduction(max: SomeB)
for(;;);
//CHECK: #pragma acc loop reduction(min: iPtr)
#pragma acc loop reduction(min: iPtr)
for(;;);
//CHECK: #pragma acc loop reduction(&: i)
#pragma acc loop reduction(&: i)
for(;;);
//CHECK: #pragma acc loop reduction(|: SomeB)
#pragma acc loop reduction(|: SomeB)
for(;;);
//CHECK: #pragma acc loop reduction(^: iPtr)
#pragma acc loop reduction(^: iPtr)
for(;;);
//CHECK: #pragma acc loop reduction(&&: i)
#pragma acc loop reduction(&&: i)
for(;;);
//CHECK: #pragma acc loop reduction(||: SomeB)
#pragma acc loop reduction(||: SomeB)
for(;;);
}
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ int main() {
(void)__builtin_sycl_unique_stable_name(decltype(lambda3));

// Make sure the following 3 are the same between the host and device compile.
// Note that these are NOT the same value as eachother, they differ by the
// Note that these are NOT the same value as each other, they differ by the
// signature.
// CHECK: private unnamed_addr [[$ADDRSPACE]]constant [17 x i8] c"_ZTSZ4mainEUlvE_\00"
// CHECK: private unnamed_addr [[$ADDRSPACE]]constant [17 x i8] c"_ZTSZ4mainEUliE_\00"
Expand Down
18 changes: 18 additions & 0 deletions clang/test/Driver/ps5-linker.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@
// CHECK-NO-PIE-NOT: "-pie"
// CHECK-SHARED: "--shared"

// Test the driver supplies an --image-base to the linker only for non-pie
// executables.

// RUN: %clang --target=x86_64-sie-ps5 -static %s -### 2>&1 | FileCheck --check-prefixes=CHECK-BASE %s
// RUN: %clang --target=x86_64-sie-ps5 -no-pie %s -### 2>&1 | FileCheck --check-prefixes=CHECK-BASE %s

// CHECK-BASE: {{ld(\.exe)?}}"
// CHECK-BASE-SAME: "--image-base=0x400000"

// RUN: %clang --target=x86_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-BASE %s
// RUN: %clang --target=x86_64-sie-ps5 -r %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-BASE %s
// RUN: %clang --target=x86_64-sie-ps5 -shared %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-BASE %s

// CHECK-NO-BASE: {{ld(\.exe)?}}"
// CHECK-NO-BASE-NOT: --image-base

// Test the driver passes PlayStation-specific options to the linker that are
// appropriate for the type of output. Many options don't apply for relocatable
// output (-r).
Expand All @@ -37,6 +53,8 @@
// CHECK-EXE-SAME: "--unresolved-symbols=report-all"
// CHECK-EXE-SAME: "-z" "now"
// CHECK-EXE-SAME: "-z" "start-stop-visibility=hidden"
// CHECK-EXE-SAME: "-z" "common-page-size=0x4000"
// CHECK-EXE-SAME: "-z" "max-page-size=0x4000"
// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff"
// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_ranges=0xfffffffffffffffe"
// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_loc=0xfffffffffffffffe"
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaCXX/ext-vector-type-conditional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void Operands() {
(void)(four_ints ? uss : shrt); // should be fine, since they get promoted to int.
(void)(four_ints ? shrt : shrt); // expected-error {{vector condition type 'FourInts' (vector of 4 'int' values) and result type 'short __attribute__((ext_vector_type(4)))' (vector of 4 'short' values) do not have elements of the same size}}

// Vectors must be the same type as eachother.
// Vectors must be the same type as each other.
(void)(four_ints ? four_uints : four_floats); // expected-error {{vector operands to the vector conditional must be the same type ('FourUInts' (vector of 4 'unsigned int' values) and 'FourFloats' (vector of 4 'float' values))}}
(void)(four_ints ? four_uints : four_ints); // expected-error {{vector operands to the vector conditional must be the same type ('FourUInts' (vector of 4 'unsigned int' values) and 'FourInts' (vector of 4 'int' values))}}
(void)(four_ints ? four_ints : four_uints); // expected-error {{vector operands to the vector conditional must be the same type ('FourInts' (vector of 4 'int' values) and 'FourUInts' (vector of 4 'unsigned int' values))}}
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaCXX/vector-size-conditional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void Operands() {
(void)(four_ints ? uss : shrt); // should be fine, since they get promoted to int.
(void)(four_ints ? shrt : shrt); //expected-error {{vector condition type 'FourInts' (vector of 4 'int' values) and result type '__attribute__((__vector_size__(4 * sizeof(short)))) short' (vector of 4 'short' values) do not have elements of the same size}}

// Vectors must be the same type as eachother.
// Vectors must be the same type as each other.
(void)(four_ints ? four_uints : four_floats); // expected-error {{vector operands to the vector conditional must be the same type ('FourUInts' (vector of 4 'unsigned int' values) and 'FourFloats' (vector of 4 'float' values))}}
(void)(four_ints ? four_uints : four_ints); // expected-error {{vector operands to the vector conditional must be the same type ('FourUInts' (vector of 4 'unsigned int' values) and 'FourInts' (vector of 4 'int' values))}}
(void)(four_ints ? four_ints : four_uints); // expected-error {{vector operands to the vector conditional must be the same type ('FourInts' (vector of 4 'int' values) and 'FourUInts' (vector of 4 'unsigned int' values))}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop auto present_or_create(Var)
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop auto reduction(+:Var)
for(;;);
#pragma acc loop auto collapse(1)
Expand Down Expand Up @@ -268,7 +267,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop present_or_create(Var) auto
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop reduction(+:Var) auto
for(;;);
#pragma acc loop collapse(1) auto
Expand Down Expand Up @@ -404,7 +402,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop independent present_or_create(Var)
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop independent reduction(+:Var)
for(;;);
#pragma acc loop independent collapse(1)
Expand Down Expand Up @@ -539,7 +536,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop present_or_create(Var) independent
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop reduction(+:Var) independent
for(;;);
#pragma acc loop collapse(1) independent
Expand Down Expand Up @@ -683,7 +679,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop seq present_or_create(Var)
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop seq reduction(+:Var)
for(;;);
#pragma acc loop seq collapse(1)
Expand Down Expand Up @@ -824,7 +819,6 @@ void uses() {
// expected-error@+1{{OpenACC 'present_or_create' clause is not valid on 'loop' directive}}
#pragma acc loop present_or_create(Var) seq
for(;;);
// expected-warning@+1{{OpenACC clause 'reduction' not yet implemented}}
#pragma acc loop reduction(+:Var) seq
for(;;);
#pragma acc loop collapse(1) seq
Expand Down
375 changes: 375 additions & 0 deletions clang/test/SemaOpenACC/loop-construct-reduction-ast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,375 @@
// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s

// Test this with PCH.
// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s
// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s

#ifndef PCH_HELPER
#define PCH_HELPER

void NormalFunc(int i, float f) {
// CHECK: FunctionDecl{{.*}}NormalFunc
// CHECK-NEXT: ParmVarDecl
// CHECK-NEXT: ParmVarDecl
// CHECK-NEXT: CompoundStmt
#pragma acc loop reduction(+: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: +
// CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(*: f)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: *
// CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(max: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: max
// CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(min: f)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: min
// CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(&: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &
// CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(|: f)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: |
// CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt


#pragma acc loop reduction(^: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ^
// CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(&&: f)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &&
// CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt


#pragma acc loop reduction(||: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ||
// CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
}

template<typename T>
void TemplFunc() {
// CHECK: FunctionTemplateDecl{{.*}}TemplFunc
// CHECK-NEXT: TemplateTypeParmDecl

// Match the prototype:
// CHECK-NEXT: FunctionDecl{{.*}}TemplFunc
// CHECK-NEXT: CompoundStmt

T t;
// CHECK-NEXT: DeclStmt
// CHECK-NEXT: VarDecl{{.*}} t 'T'

#pragma acc loop reduction(+: t)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: +
// CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(*: T::SomeFloat)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: *
// CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

typename T::IntTy i;
// CHECK-NEXT: DeclStmt
// CHECK-NEXT: VarDecl{{.*}} i 'typename T::IntTy'

#pragma acc loop reduction(max: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: max
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(min: t)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: min
// CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(&: T::SomeFloat)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &
// CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(|: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: |
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(^: t)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ^
// CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(&&: T::SomeFloat)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &&
// CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

#pragma acc loop reduction(||: i)
for(;;);
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ||
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

// Match the instantiation:

// CHECK: FunctionDecl{{.*}}TemplFunc 'void ()' implicit_instantiation
// CHECK-NEXT: TemplateArgument type 'InstTy'
// CHECK-NEXT: RecordType{{.*}} 'InstTy'
// CHECK-NEXT: CXXRecord{{.*}} 'InstTy'
// CHECK-NEXT: CompoundStmt
//
// CHECK-NEXT: DeclStmt
// CHECK-NEXT: VarDecl{{.*}} used t 'InstTy'
// CHECK-NEXT: CXXConstructExpr{{.*}} 'InstTy' 'void () noexcept'
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: +
// CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: *
// CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: DeclStmt
// CHECK-NEXT: VarDecl{{.*}} i 'typename InstTy::IntTy':'int'
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: max
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: min
// CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &
// CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: |
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ^
// CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: &&
// CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
// CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt
//
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}<orphan>
// CHECK-NEXT: reduction clause Operator: ||
// CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: NullStmt

}

struct BoolConversion{ operator bool() const;};
struct InstTy {
using IntTy = int;
static constexpr float SomeFloat = 5.0;
static constexpr BoolConversion BC;
};

void Instantiate() {
TemplFunc<InstTy>();
}

#endif // PCH_HELPER
366 changes: 366 additions & 0 deletions clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
// RUN: %clang_cc1 %s -fopenacc -verify

struct CompositeOfScalars {
int I;
float F;
short J;
char C;
double D;
_Complex float CF;
_Complex double CD;
};

struct CompositeHasComposite {
int I;
float F;
short J;
char C;
double D;
_Complex float CF;
_Complex double CD;
struct CompositeOfScalars COS; // #COS_FIELD
};
void uses() {

int I;
float F;
int Array[5];
CompositeOfScalars CoS;
CompositeHasComposite ChC;

#pragma acc serial
{
#pragma acc loop reduction(+:CoS, I, F)
for(;;){}
}

#pragma acc serial
{
// expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
#pragma acc loop reduction(+:Array)
for(;;){}
}

#pragma acc serial
{
// expected-error@+2{{OpenACC 'reduction' composite variable must not have non-scalar field}}
// expected-note@#COS_FIELD{{invalid field is here}}
#pragma acc loop reduction(+:ChC)
for(;;){}
}

#pragma acc serial
{
#pragma acc loop reduction(+:I)
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc loop reduction(&:I)
for(;;) {
}
}
}

#pragma acc serial
{
#pragma acc loop reduction(+:I)
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc loop reduction(&:I)
for(;;) {
}
}
}

#pragma acc serial
{
#pragma acc loop reduction(+:I)
for(;;) {
#pragma acc serial
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop reduction(&:I)
for(;;) {
}
}
}

#pragma acc serial reduction(+:I)
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-2{{previous clause is here}}
#pragma acc loop reduction(&:I)
for(;;){}

#pragma acc serial
#pragma acc loop reduction(&:I)
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (+ vs &)}}
// expected-note@-3{{previous clause is here}}
#pragma acc serial reduction(+:I)
;
}

#pragma acc parallel
{
#pragma acc loop reduction(+:I) gang(dim:1)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'gang' clause with a 'dim' value greater than 1 cannot appear on the same 'loop' construct as a 'reduction' clause}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:2)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause with a 'dim' value greater than 1}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop gang(dim:2) reduction(+:I)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause with a 'dim' value greater than 1}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop gang gang(dim:1) gang(dim:2) reduction(+:I)
for(;;) {
}
}

#pragma acc parallel num_gangs(1, 2)
{
// expected-error@+3{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop gang(dim:1) reduction(+:I)
for(;;) {
}
}

#pragma acc parallel num_gangs(2, 3)
{
// expected-error@+3{{OpenACC 'gang' clause cannot appear on the same 'loop' construct as a 'reduction' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:1)
for(;;) {
}
}
}

template<typename IntTy, typename CoSTy, typename ChCTy, unsigned One,
unsigned Two>
void templ_uses() {
IntTy I;
IntTy Array[5];
CoSTy CoS;
ChCTy ChC;

#pragma acc serial
{
#pragma acc loop reduction(+:CoS, I)
for(;;){}
}

#pragma acc serial
{
// expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
#pragma acc loop reduction(+:Array)
for(;;){}
}

#pragma acc serial
{
// expected-error@+2{{OpenACC 'reduction' composite variable must not have non-scalar field}}
// expected-note@#COS_FIELD{{invalid field is here}}
#pragma acc loop reduction(+:ChC)
for(;;){}
}

#pragma acc serial
{
#pragma acc loop reduction(+:I)
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc loop reduction(&:I)
for(;;) {
}
}
}

#pragma acc serial
{
#pragma acc loop reduction(+:Array[3])
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc loop reduction(&:Array[3])
for(;;) {
}
}
}

#pragma acc serial
{
#pragma acc loop reduction(+:Array[0:3])
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc loop reduction(&:Array[1:4])
for(;;) {
}
}
}

#pragma acc serial
{
#pragma acc loop reduction(+:I)
for(;;) {
// expected-error@+2{{OpenACC 'reduction' variable must have the same operator in all nested constructs (& vs +)}}
// expected-note@-3{{previous clause is here}}
#pragma acc serial reduction(&:I)
for(;;) {
}
}
}

#pragma acc parallel
{
#pragma acc loop reduction(+:I) gang(dim:One)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'gang' clause with a 'dim' value greater than 1 cannot appear on the same 'loop' construct as a 'reduction' clause}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:2)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause with a 'dim' value greater than 1}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop gang(dim:2) reduction(+:I)
for(;;) {
}
}
#pragma acc parallel
{
// expected-error@+2{{OpenACC 'gang' clause with a 'dim' value greater than 1 cannot appear on the same 'loop' construct as a 'reduction' clause}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:Two)
for(;;) {
}
}

#pragma acc parallel
{
// expected-error@+2{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause with a 'dim' value greater than 1}}
// expected-note@+1{{previous clause is here}}
#pragma acc loop gang(dim:Two) reduction(+:I)
for(;;) {
}
}


#pragma acc parallel num_gangs(One)
{
#pragma acc loop reduction(+:I) gang(dim:One)
for(;;) {
}
}

#pragma acc parallel num_gangs(Two, 1)
{
// expected-error@+3{{OpenACC 'gang' clause cannot appear on the same 'loop' construct as a 'reduction' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:One)
for(;;) {
}
}

#pragma acc parallel num_gangs(Two, 1)
{
// expected-error@+3{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop gang(dim:One) reduction(+:I)
for(;;) {
}
}

#pragma acc parallel num_gangs(One)
{
#pragma acc loop reduction(+:I) gang(dim:1)
for(;;) {
}
}

#pragma acc parallel num_gangs(Two, 1)
{
// expected-error@+3{{OpenACC 'gang' clause cannot appear on the same 'loop' construct as a 'reduction' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:1)
for(;;) {
}
}

#pragma acc parallel num_gangs(Two, 1)
{
// expected-error@+3{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop gang(dim:1) reduction(+:I)
for(;;) {
}
}

#pragma acc parallel num_gangs(1)
{
#pragma acc loop reduction(+:I) gang(dim:One)
for(;;) {
}
}

#pragma acc parallel num_gangs(2, 1)
{
// expected-error@+3{{OpenACC 'gang' clause cannot appear on the same 'loop' construct as a 'reduction' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop reduction(+:I) gang(dim:One)
for(;;) {
}
}

#pragma acc parallel num_gangs(2, 1)
{
// expected-error@+3{{OpenACC 'reduction' clause cannot appear on the same 'loop' construct as a 'gang' clause inside a compute construct with a 'num_gangs' clause with more than one argument}}
// expected-note@+2{{previous clause is here}}
// expected-note@-4{{previous clause is here}}
#pragma acc loop gang(dim:One) reduction(+:I)
for(;;) {
}
}
}

void inst() {
// expected-note@+1{{in instantiation of function template specialization}}
templ_uses<int, CompositeOfScalars, CompositeHasComposite, 1, 2>();
}


2 changes: 1 addition & 1 deletion compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// NOTE!
// llvm/lib/ProfileData/CtxInstrContextNode.h and
// compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
// must be exact copies of eachother
// must be exact copies of each other
//
// compiler-rt creates these objects as part of the instrumentation runtime for
// contextual profiling. LLVM only consumes them to convert a contextual tree
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
;
; NOTE: if this test fails, please make sure the two files are identical copies
; of eachother.
; of each other.
;
; RUN: diff %crt_src/lib/ctx_profile/CtxInstrContextNode.h %llvm_src/include/llvm/ProfileData/CtxInstrContextNode.h
6 changes: 5 additions & 1 deletion libc/cmake/modules/LLVMLibCArchitectures.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ function(get_arch_and_system_from_triple triple arch_var sys_var)
set(target_arch "arm")
elseif(target_arch MATCHES "^aarch64")
set(target_arch "aarch64")
elseif(target_arch MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
elseif(target_arch MATCHES "(x86_64)|(AMD64|amd64)")
set(target_arch "x86_64")
elseif(target_arch MATCHES "(^i.86$)")
set(target_arch "i386")
elseif(target_arch MATCHES "^(powerpc|ppc)")
set(target_arch "power")
elseif(target_arch MATCHES "^riscv32")
Expand Down Expand Up @@ -147,6 +149,8 @@ if(LIBC_TARGET_ARCHITECTURE STREQUAL "arm")
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "aarch64")
set(LIBC_TARGET_ARCHITECTURE_IS_AARCH64 TRUE)
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "x86_64")
set(LIBC_TARGET_ARCHITECTURE_IS_X86_64 TRUE)
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "i386")
set(LIBC_TARGET_ARCHITECTURE_IS_X86 TRUE)
elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv64")
set(LIBC_TARGET_ARCHITECTURE_IS_RISCV64 TRUE)
Expand Down
4 changes: 2 additions & 2 deletions libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
# Initialize ALL_CPU_FEATURES as empty list.
set(ALL_CPU_FEATURES "")

if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
if(LIBC_TARGET_ARCHITECTURE_IS_X86_64)
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
elseif(LIBC_TARGET_ARCHITECTURE_IS_AARCH64)
set(ALL_CPU_FEATURES "FullFP16")
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
endif()
Expand Down
6 changes: 3 additions & 3 deletions libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ function(_get_compile_options_from_flags output_var)

if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
if(ADD_FMA_FLAG)
if(LIBC_TARGET_ARCHITECTURE_IS_X86)
if(LIBC_TARGET_ARCHITECTURE_IS_X86_64)
list(APPEND compile_options "-mavx2")
list(APPEND compile_options "-mfma")
elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64)
list(APPEND compile_options "-D__LIBC_RISCV_USE_FMA")
endif()
endif()
if(ADD_ROUND_OPT_FLAG)
if(LIBC_TARGET_ARCHITECTURE_IS_X86)
if(LIBC_TARGET_ARCHITECTURE_IS_X86_64)
# ROUND_OPT_FLAG is only enabled if SSE4.2 is detected, not just SSE4.1,
# because there was code to check for SSE4.2 already, and few CPUs only
# have SSE4.1.
Expand Down Expand Up @@ -145,7 +145,7 @@ function(_get_common_compile_options output_var flags)
endif()
if (LIBC_CONF_KEEP_FRAME_POINTER)
list(APPEND compile_options "-fno-omit-frame-pointer")
if (LIBC_TARGET_ARCHITECTURE_IS_X86)
if (LIBC_TARGET_ARCHITECTURE_IS_X86_64)
list(APPEND compile_options "-mno-omit-leaf-frame-pointer")
endif()
endif()
Expand Down
6 changes: 3 additions & 3 deletions libc/cmake/modules/LLVMLibCFlagRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -268,21 +268,21 @@ set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT")
set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT")

# Skip FMA_OPT flag for targets that don't support fma.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86_64 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
LIBC_TARGET_ARCHITECTURE_IS_RISCV64))
set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
endif()

# Skip EXPLICIT_SIMD_OPT flag for targets that don't support SSE2.
# Note: one may want to revisit it if they want to control other explicit SIMD
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86_64 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE)
endif()

# Skip ROUND_OPT flag for targets that don't support rounding instructions. On
# x86, these are SSE4.1 instructions, but we already had code to check for
# SSE4.2 support.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86_64 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
Expand Down
2 changes: 1 addition & 1 deletion libc/cmake/modules/LLVMLibCTestRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ function(add_integration_test test_name)
if(NOT INTEGRATION_TEST_SRCS)
message(FATAL_ERROR "The SRCS list for add_integration_test is missing.")
endif()
if(NOT TARGET libc.startup.${LIBC_TARGET_OS}.crt1)
if(NOT LLVM_LIBC_FULL_BUILD AND NOT TARGET libc.startup.${LIBC_TARGET_OS}.crt1)
message(FATAL_ERROR "The 'crt1' target for the integration test is missing.")
endif()

Expand Down
11 changes: 11 additions & 0 deletions libc/config/linux/i386/entrypoints.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
set(TARGET_LIBC_ENTRYPOINTS
# errno.h entrypoints
libc.src.errno.errno
)

set(TARGET_LIBM_ENTRYPOINTS "")

set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
)
3 changes: 3 additions & 0 deletions libc/config/linux/i386/headers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(TARGET_PUBLIC_HEADERS
libc.include.assert
)
2 changes: 1 addition & 1 deletion libc/docs/gpu/rpc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ but the following example shows how it can be used by a standard user.
}
// Routines to allocate mapped memory that both the host and the device can
// access asychonrously to communicate with eachother.
// access asychonrously to communicate with each other.
void *alloc_host(size_t size, void *) {
void *sharable_ptr;
if (cudaError_t err = cudaMallocHost(&sharable_ptr, sizeof(void *)))
Expand Down
11 changes: 11 additions & 0 deletions libc/src/__support/OSUtil/linux/i386/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
add_header_library(
linux_i386_util
HDRS
syscall.h
)

add_header_library(
vdso
HDRS
vdso.h
)
Empty file.
1 change: 1 addition & 0 deletions libcxx/include/__format/formatter_floating_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <__concepts/arithmetic.h>
#include <__concepts/same_as.h>
#include <__config>
#include <__cstddef/ptrdiff_t.h>
#include <__format/concepts.h>
#include <__format/format_parse_context.h>
#include <__format/formatter.h>
Expand Down
6 changes: 5 additions & 1 deletion libcxx/include/__functional/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,11 @@ struct _LIBCPP_TEMPLATE_VIS hash<long> : public __unary_function<long, size_t> {

template <>
struct _LIBCPP_TEMPLATE_VIS hash<unsigned long> : public __unary_function<unsigned long, size_t> {
_LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned long __v) const _NOEXCEPT { return static_cast<size_t>(__v); }
_LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned long __v) const _NOEXCEPT {
static_assert(sizeof(size_t) >= sizeof(unsigned long),
"This would be a terrible hash function on a platform where size_t is smaller than unsigned long");
return static_cast<size_t>(__v);
}
};

template <>
Expand Down
1 change: 1 addition & 0 deletions libcxx/include/__string/char_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <__assert>
#include <__compare/ordering.h>
#include <__config>
#include <__cstddef/ptrdiff_t.h>
#include <__functional/hash.h>
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
variable_var1_completion = {"text": "var1", "label": "var1 -- int &"}
variable_var2_completion = {"text": "var2", "label": "var2 -- int &"}

# Older version of libcxx produce slightly different typename strings for
# templates like vector.
@skipIf(compiler="clang", compiler_version=["<", "16.0"])
class TestDAP_completions(lldbdap_testcase.DAPTestCaseBase):
def verify_completions(self, actual_list, expected_list, not_expected_list=[]):
for expected_item in expected_list:
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/ADT/APInt.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class [[nodiscard]] APInt {
/// \param implicitTrunc allow implicit truncation of non-zero/sign bits of
/// val beyond the range of numBits
APInt(unsigned numBits, uint64_t val, bool isSigned = false,
bool implicitTrunc = true)
bool implicitTrunc = false)
: BitWidth(numBits) {
if (!implicitTrunc) {
if (isSigned) {
Expand Down
5 changes: 2 additions & 3 deletions llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
Expand Down Expand Up @@ -854,7 +853,7 @@ class SectionRange {

class LinkGraph {
private:
using SectionMap = MapVector<StringRef, std::unique_ptr<Section>>;
using SectionMap = DenseMap<StringRef, std::unique_ptr<Section>>;
using ExternalSymbolMap = StringMap<Symbol *>;
using AbsoluteSymbolSet = DenseSet<Symbol *>;
using BlockSet = DenseSet<Block *>;
Expand Down Expand Up @@ -1596,7 +1595,7 @@ class LinkGraph {
unsigned PointerSize;
llvm::endianness Endianness;
GetEdgeKindNameFunction GetEdgeKindName = nullptr;
MapVector<StringRef, std::unique_ptr<Section>> Sections;
DenseMap<StringRef, std::unique_ptr<Section>> Sections;
ExternalSymbolMap ExternalSymbols;
AbsoluteSymbolSet AbsoluteSymbols;
orc::shared::AllocActions AAs;
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ class StaticLibraryDefinitionGenerator : public DefinitionGenerator {

/// Returns a list of filenames of dynamic libraries that this archive has
/// imported. This class does not load these libraries by itself. User is
/// responsible for making sure these libraries are avaliable to the JITDylib.
/// responsible for making sure these libraries are available to the JITDylib.
const std::set<std::string> &getImportedDynamicLibraries() const {
return ImportedDynamicLibraries;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ class LSUnitBase : public HardwareUnit {
/// True if loads don't alias with stores.
///
/// By default, the LS unit assumes that loads and stores don't alias with
/// eachother. If this field is set to false, then loads are always assumed to
/// alias with stores.
/// each other. If this field is set to false, then loads are always assumed
/// to alias with stores.
const bool NoAlias;

/// Used to map group identifiers to MemoryGroups.
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Object/WindowsMachineFlag.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ template <typename T> Triple::ArchType getMachineArchType(T machine) {
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
case COFF::IMAGE_FILE_MACHINE_ARM64X:
return llvm::Triple::ArchType::aarch64;
case COFF::IMAGE_FILE_MACHINE_R4000:
return llvm::Triple::ArchType::mipsel;
default:
return llvm::Triple::ArchType::UnknownArch;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/ObjectYAML/COFFYAML.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ struct ScalarEnumerationTraits<COFF::RelocationTypeAMD64> {
static void enumeration(IO &IO, COFF::RelocationTypeAMD64 &Value);
};

template <> struct ScalarEnumerationTraits<COFF::RelocationTypesMips> {
static void enumeration(IO &IO, COFF::RelocationTypesMips &Value);
};

template <>
struct ScalarEnumerationTraits<COFF::RelocationTypesARM> {
static void enumeration(IO &IO, COFF::RelocationTypesARM &Value);
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/ProfileData/CtxInstrContextNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// NOTE!
// llvm/lib/ProfileData/CtxInstrContextNode.h and
// compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
// must be exact copies of eachother
// must be exact copies of each other
//
// compiler-rt creates these objects as part of the instrumentation runtime for
// contextual profiling. LLVM only consumes them to convert a contextual tree
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Target/TargetMacroFusion.td
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def first_fusion_target : FusionTarget;
def second_fusion_target : FusionTarget;
def both_fusion_target : FusionTarget;

// Base class of FusionPredicate, etc. The avaliable variables are:
// Base class of FusionPredicate, etc. The available variables are:
// * const TargetInstrInfo &TII
// * const TargetSubtargetInfo &STI
// * const MachineRegisterInfo &MRI
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3257,16 +3257,16 @@ InlineCostAnnotationPrinterPass::run(Function &F,
const InlineParams Params = llvm::getInlineParams();
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
Function *CalledFunction = CI->getCalledFunction();
if (auto *CB = dyn_cast<CallBase>(&I)) {
Function *CalledFunction = CB->getCalledFunction();
if (!CalledFunction || CalledFunction->isDeclaration())
continue;
OptimizationRemarkEmitter ORE(CalledFunction);
InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
GetAssumptionCache, nullptr, &PSI, &ORE);
ICCA.analyze();
OS << " Analyzing call of " << CalledFunction->getName()
<< "... (caller:" << CI->getCaller()->getName() << ")\n";
<< "... (caller:" << CB->getCaller()->getName() << ")\n";
ICCA.print(OS);
OS << "\n";
}
Expand Down
13 changes: 0 additions & 13 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1095,19 +1095,6 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
if (match(Op1, m_Zero()))
return PoisonValue::get(Ty);

// If any element of a constant divisor fixed width vector is zero or undef
// the behavior is undefined and we can fold the whole op to poison.
auto *Op1C = dyn_cast<Constant>(Op1);
auto *VTy = dyn_cast<FixedVectorType>(Ty);
if (Op1C && VTy) {
unsigned NumElts = VTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt)))
return PoisonValue::get(Ty);
}
}

// poison / X -> poison
// poison % X -> poison
if (isa<PoisonValue>(Op0))
Expand Down
19 changes: 15 additions & 4 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6003,21 +6003,32 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
if (IncValue == P)
continue;

Instruction *CxtI = P->getIncomingBlock(U)->getTerminator();

// If the Use is a select of this phi, use the fp class of the other
// operand to break the recursion.
// operand to break the recursion. Same around 2-operand phi nodes
Value *V;
if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P)))) {
IncValue = V;
} else if (auto *IncPhi = dyn_cast<PHINode>(IncValue);
IncPhi && IncPhi->getNumIncomingValues() == 2) {
for (int Idx = 0; Idx < 2; ++Idx) {
if (IncPhi->getIncomingValue(Idx) == P) {
IncValue = IncPhi->getIncomingValue(1 - Idx);
CxtI = IncPhi->getIncomingBlock(1 - Idx)->getTerminator();
break;
}
}
}

KnownFPClass KnownSrc;
// Recurse, but cap the recursion to two levels, because we don't want
// to waste time spinning around in loops. We need at least depth 2 to
// detect known sign bits.
computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
PhiRecursionLimit,
Q.getWithoutCondContext().getWithInstruction(
P->getIncomingBlock(U)->getTerminator()));
Q.getWithoutCondContext().getWithInstruction(CxtI));

if (First) {
Known = KnownSrc;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SafeStack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ class SafeStackLegacyPass : public FunctionPass {
bool ShouldPreserveDominatorTree;
std::optional<DominatorTree> LazilyComputedDomTree;

// Do we already have a DominatorTree avaliable from the previous pass?
// Do we already have a DominatorTree available from the previous pass?
// Note that we should *NOT* require it, to avoid the case where we end up
// not needing it, but the legacy PM would have computed it for us anyways.
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
Expand Down
94 changes: 47 additions & 47 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4119,61 +4119,61 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
return;
}

if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle.
int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
bool CanExtract = true;
for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
assert(SrcNumElts > MaskNumElts);

if (Idx >= (int)SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle.
int StartIdx[2] = {-1, -1}; // StartIdx to extract from
bool CanExtract = true;
for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;

// If all the indices come from the same MaskNumElts sized portion of
// the sources we can use extract. Also make sure the extract wouldn't
// extract past the end of the source.
int NewStartIdx = alignDown(Idx, MaskNumElts);
if (NewStartIdx + MaskNumElts > SrcNumElts ||
(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
CanExtract = false;
// Make sure we always update StartIdx as we use it to track if all
// elements are undef.
StartIdx[Input] = NewStartIdx;
if (Idx >= (int)SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}

if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}
// If all the indices come from the same MaskNumElts sized portion of
// the sources we can use extract. Also make sure the extract wouldn't
// extract past the end of the source.
int NewStartIdx = alignDown(Idx, MaskNumElts);
if (NewStartIdx + MaskNumElts > SrcNumElts ||
(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
CanExtract = false;
// Make sure we always update StartIdx as we use it to track if all
// elements are undef.
StartIdx[Input] = NewStartIdx;
}

// Calculate new mask.
SmallVector<int, 8> MappedOps(Mask);
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
else if (Idx >= 0)
Idx -= StartIdx[0];
if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}

setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
return;
// Calculate new mask.
SmallVector<int, 8> MappedOps(Mask);
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
else if (Idx >= 0)
Idx -= StartIdx[0];
}

setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
return;
}

// We can't use either concat vectors or extract subvectors so fall back to
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,18 @@ void LinkGraph::dump(raw_ostream &OS) {
return false;
});

for (auto &Sec : sections()) {
OS << "section " << Sec.getName() << ":\n\n";
std::vector<Section *> SortedSections;
for (auto &Sec : sections())
SortedSections.push_back(&Sec);
llvm::sort(SortedSections, [](const Section *LHS, const Section *RHS) {
return LHS->getName() < RHS->getName();
});

for (auto *Sec : SortedSections) {
OS << "section " << Sec->getName() << ":\n\n";

std::vector<Block *> SortedBlocks;
llvm::copy(Sec.blocks(), std::back_inserter(SortedBlocks));
llvm::copy(Sec->blocks(), std::back_inserter(SortedBlocks));
llvm::sort(SortedBlocks, [](const Block *LHS, const Block *RHS) {
return LHS->getAddress() < RHS->getAddress();
});
Expand Down
40 changes: 35 additions & 5 deletions llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,12 +921,42 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
SmallVector<ExecutorAddrRange> ELFNixPlatformSecs;
LLVM_DEBUG(dbgs() << "ELFNixPlatform::registerInitSections\n");

for (auto &Sec : G.sections()) {
if (isELFInitializerSection(Sec.getName())) {
jitlink::SectionRange R(Sec);
ELFNixPlatformSecs.push_back(R.getRange());
SmallVector<jitlink::Section *> OrderedInitSections;
for (auto &Sec : G.sections())
if (isELFInitializerSection(Sec.getName()))
OrderedInitSections.push_back(&Sec);

// FIXME: This handles priority order within the current graph, but we'll need
// to include priority information in the initializer allocation
// actions in order to respect the ordering across multiple graphs.
llvm::sort(OrderedInitSections, [](const jitlink::Section *LHS,
const jitlink::Section *RHS) {
if (LHS->getName().starts_with(".init_array")) {
if (RHS->getName().starts_with(".init_array")) {
StringRef LHSPrioStr(LHS->getName());
StringRef RHSPrioStr(RHS->getName());
uint64_t LHSPriority;
bool LHSHasPriority = LHSPrioStr.consume_front(".init_array.") &&
!LHSPrioStr.getAsInteger(10, LHSPriority);
uint64_t RHSPriority;
bool RHSHasPriority = RHSPrioStr.consume_front(".init_array.") &&
!RHSPrioStr.getAsInteger(10, RHSPriority);
if (LHSHasPriority)
return RHSHasPriority ? LHSPriority < RHSPriority : true;
else if (RHSHasPriority)
return false;
// If we get here we'll fall through to the
// LHS->getName() < RHS->getName() test below.
} else {
// .init_array[.N] comes before any non-.init_array[.N] section.
return true;
}
}
}
return LHS->getName() < RHS->getName();
});

for (auto &Sec : OrderedInitSections)
ELFNixPlatformSecs.push_back(jitlink::SectionRange(*Sec).getRange());

// Dump the scraped inits.
LLVM_DEBUG({
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/IR/ConstantFold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,11 +902,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
Constant *ExtractIdx = ConstantInt::get(Ty, i);
Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);

// If any element of a divisor vector is zero, the whole op is poison.
if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
return PoisonValue::get(VTy);

Constant *Res = ConstantExpr::isDesirableBinOp(Opcode)
? ConstantExpr::get(Opcode, LHS, RHS)
: ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/COFFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,8 @@ StringRef COFFObjectFile::getFileFormatName() const {
return "COFF-ARM64EC";
case COFF::IMAGE_FILE_MACHINE_ARM64X:
return "COFF-ARM64X";
case COFF::IMAGE_FILE_MACHINE_R4000:
return "COFF-MIPS";
default:
return "COFF-<unknown arch>";
}
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/ObjectYAML/COFFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,25 @@ void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration(
ECase(IMAGE_REL_AMD64_SSPAN32);
}

void ScalarEnumerationTraits<COFF::RelocationTypesMips>::enumeration(
IO &IO, COFF::RelocationTypesMips &Value) {
ECase(IMAGE_REL_MIPS_ABSOLUTE);
ECase(IMAGE_REL_MIPS_REFHALF);
ECase(IMAGE_REL_MIPS_REFWORD);
ECase(IMAGE_REL_MIPS_JMPADDR);
ECase(IMAGE_REL_MIPS_REFHI);
ECase(IMAGE_REL_MIPS_REFLO);
ECase(IMAGE_REL_MIPS_GPREL);
ECase(IMAGE_REL_MIPS_LITERAL);
ECase(IMAGE_REL_MIPS_SECTION);
ECase(IMAGE_REL_MIPS_SECREL);
ECase(IMAGE_REL_MIPS_SECRELLO);
ECase(IMAGE_REL_MIPS_SECRELHI);
ECase(IMAGE_REL_MIPS_JMPADDR16);
ECase(IMAGE_REL_MIPS_REFWORDNB);
ECase(IMAGE_REL_MIPS_PAIR);
}

void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration(
IO &IO, COFF::RelocationTypesARM &Value) {
ECase(IMAGE_REL_ARM_ABSOLUTE);
Expand Down Expand Up @@ -427,6 +446,10 @@ void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
MappingNormalization<NType<COFF::RelocationTypeAMD64>, uint16_t> NT(
IO, Rel.Type);
IO.mapRequired("Type", NT->Type);
} else if (H.Machine == COFF::IMAGE_FILE_MACHINE_R4000) {
MappingNormalization<NType<COFF::RelocationTypesMips>, uint16_t> NT(
IO, Rel.Type);
IO.mapRequired("Type", NT->Type);
} else if (H.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) {
MappingNormalization<NType<COFF::RelocationTypesARM>, uint16_t> NT(
IO, Rel.Type);
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,6 @@ def FeatureTHE : ExtensionWithMArch<"the", "THE", "FEAT_THE",
// Armv9.0 Architecture Extensions
//===----------------------------------------------------------------------===//

def FeatureUseFixedOverScalableIfEqualCost: SubtargetFeature<"use-fixed-over-scalable-if-equal-cost",
"UseFixedOverScalableIfEqualCost", "true",
"Prefer fixed width loop vectorization over scalable if the cost-model assigns equal costs">;

def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
"UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;

Expand Down Expand Up @@ -797,6 +793,10 @@ def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedO
def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
"true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;

def FeatureUseFixedOverScalableIfEqualCost: SubtargetFeature<"use-fixed-over-scalable-if-equal-cost",
"UseFixedOverScalableIfEqualCost", "true",
"Prefer fixed width loop vectorization over scalable if the cost-model assigns equal costs">;

//===----------------------------------------------------------------------===//
// Architectures.
//
Expand Down
61 changes: 42 additions & 19 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;

const unsigned Size = Ty.getSizeInBits();
if (Ty.isPointerVector())
return true;
if (Size <= 64)
return false;
// Address space 8 pointers get their own workaround.
Expand All @@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
if (!Ty.isVector())
return true;

if (Ty.isPointerVector())
return true;

unsigned EltSize = Ty.getScalarSizeInBits();
return EltSize != 32 && EltSize != 64;
}
Expand Down Expand Up @@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return Reg;
}

Register AMDGPULegalizerInfo::fixStoreSourceType(
MachineIRBuilder &B, Register VData, bool IsFormat) const {
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
Register VData, LLT MemTy,
bool IsFormat) const {
MachineRegisterInfo *MRI = B.getMRI();
LLT Ty = MRI->getType(VData);

Expand All @@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
if (hasBufferRsrcWorkaround(Ty))
return castBufferRsrcToV4I32(VData, B);

if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
Ty = getBitcastRegisterType(Ty);
VData = B.buildBitcast(Ty, VData).getReg(0);
}
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
Expand All @@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
}

bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
LegalizerHelper &Helper,
bool IsTyped,
bool IsFormat) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();

Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const LLT S32 = LLT::scalar(32);

VData = fixStoreSourceType(B, VData, IsFormat);
castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();

MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize().getValue();
LLT MemTy = MMO->getMemoryType();

VData = fixStoreSourceType(B, VData, MemTy, IsFormat);

castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();

unsigned ImmOffset;

Expand Down Expand Up @@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
}

bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
LegalizerHelper &Helper,
bool IsFormat,
bool IsTyped) const {
MachineIRBuilder &B = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *B.getMRI();
GISelChangeObserver &Observer = Helper.Observer;

// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const LLT MemTy = MMO->getMemoryType();
Expand Down Expand Up @@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
// logic doesn't have to handle that case.
if (hasBufferRsrcWorkaround(Ty)) {
Observer.changingInstr(MI);
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
Ty = getBitcastRegisterType(Ty);
Observer.changingInstr(MI);
Helper.bitcastDst(MI, Ty, 0);
Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
B.setInsertPt(B.getMBB(), MI);
}

LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
Expand Down Expand Up @@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
return legalizeBufferStore(MI, MRI, B, false, false);
return legalizeBufferStore(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
return legalizeBufferStore(MI, MRI, B, false, true);
return legalizeBufferStore(MI, Helper, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
return legalizeBufferStore(MI, MRI, B, true, true);
return legalizeBufferStore(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
Expand All @@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_atomic_buffer_load:
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
return legalizeBufferLoad(MI, MRI, B, false, false);
return legalizeBufferLoad(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
return legalizeBufferLoad(MI, MRI, B, true, false);
return legalizeBufferLoad(MI, Helper, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
return legalizeBufferLoad(MI, MRI, B, true, true);
return legalizeBufferLoad(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
Expand Down
12 changes: 5 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {

Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
bool IsFormat) const;

bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsTyped,
bool IsFormat) const;
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat,
bool IsTyped) const;
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
bool IsTyped, bool IsFormat) const;
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
bool IsFormat, bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,8 +740,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {

PB.registerPipelineStartEPCallback(
[](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
if (EnableHipStdPar)
PM.addPass(HipStdParAcceleratorCodeSelectionPass());
});
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2773,9 +2773,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}

} else if (Src0.isReg() && !Src1.isReg()) {
// src0 should always be able to support any operand type, so no need to
// check operand legality.
CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
} else if (!Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ class RegisterTypes<list<ValueType> reg_types> {

def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;

Expand Down
62 changes: 62 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,13 @@ VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
return VectorInfo;
}

static SDValue MaybeBitcast(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue Value) {
if (Value->getValueType(0) == VT)
return Value;
return DAG.getNode(ISD::BITCAST, DL, VT, Value);
}

// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
const NVPTXSubtarget &STI)
Expand Down Expand Up @@ -551,6 +558,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);

// Custom conversions to/from v2i8.
setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);

// Only logical ops can be done on v4i8 directly, others must be done
// elementwise.
setOperationAction(
Expand Down Expand Up @@ -2309,6 +2320,30 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}

SDValue NVPTXTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
// Handle bitcasting from v2i8 without hitting the default promotion
// strategy which goes through stack memory.
EVT FromVT = Op->getOperand(0)->getValueType(0);
if (FromVT != MVT::v2i8) {
return Op;
}

// Pack vector elements into i16 and bitcast to final type
SDLoc DL(Op);
SDValue Vec0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8,
Op->getOperand(0), DAG.getIntPtrConstant(0, DL));
SDValue Vec1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8,
Op->getOperand(0), DAG.getIntPtrConstant(1, DL));
SDValue Extend0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i16, Vec0);
SDValue Extend1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i16, Vec1);
SDValue Const8 = DAG.getConstant(8, DL, MVT::i16);
SDValue AsInt = DAG.getNode(
ISD::OR, DL, MVT::i16,
{Extend0, DAG.getNode(ISD::SHL, DL, MVT::i16, {Extend1, Const8})});
EVT ToVT = Op->getValueType(0);
return MaybeBitcast(DAG, DL, ToVT, AsInt);
}

// We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it
// would get lowered as two constant loads and vector-packing move.
// Instead we want just a constant move:
Expand Down Expand Up @@ -2817,6 +2852,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Op;
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return Op;
case ISD::EXTRACT_VECTOR_ELT:
Expand Down Expand Up @@ -6127,6 +6164,28 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}

static void ReplaceBITCAST(SDNode *Node, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
// Handle bitcasting to v2i8 without hitting the default promotion
// strategy which goes through stack memory.
SDValue Op(Node, 0);
EVT ToVT = Op->getValueType(0);
if (ToVT != MVT::v2i8) {
return;
}

// Bitcast to i16 and unpack elements into a vector
SDLoc DL(Node);
SDValue AsInt = MaybeBitcast(DAG, DL, MVT::i16, Op->getOperand(0));
SDValue Vec0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, AsInt);
SDValue Const8 = DAG.getConstant(8, DL, MVT::i16);
SDValue Vec1 =
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
DAG.getNode(ISD::SRL, DL, MVT::i16, {AsInt, Const8}));
Results.push_back(
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i8, {Vec0, Vec1}));
}

/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
Expand Down Expand Up @@ -6412,6 +6471,9 @@ void NVPTXTargetLowering::ReplaceNodeResults(
switch (N->getOpcode()) {
default:
report_fatal_error("Unhandled custom legalization");
case ISD::BITCAST:
ReplaceBITCAST(N, DAG, Results);
return;
case ISD::LOAD:
ReplaceLoadVector(N, DAG, Results);
return;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,8 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;

SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def SiFive7VS : ProcResource<1>; // Store sequencer
// The VCQ accepts instructions from the the A Pipe and holds them until the
// vector unit is ready to dequeue them. The unit dequeues up to one instruction
// per cycle, in order, as soon as the sequencer for that type of instruction is
// avaliable. This resource is meant to be used for 1 cycle by all vector
// available. This resource is meant to be used for 1 cycle by all vector
// instructions, to model that only one vector instruction may be dequed at a
// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
// VS sequencer resources below. Each of them will only accept a single
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ FunctionPass *createX86IndirectThunksPass();
FunctionPass *createX86ReturnThunksPass();

/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
/// have distinctive <LineNumber, Discriminator> (with respect to each other)
FunctionPass *createX86DiscriminateMemOpsPass();

/// This pass applies profiling information to insert cache prefetches.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *visitSRem(BinaryOperator &I);
Instruction *visitFRem(BinaryOperator &I);
bool simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I);
Instruction *commonIDivRemTransforms(BinaryOperator &I);
Instruction *commonIRemTransforms(BinaryOperator &I);
Instruction *commonIDivTransforms(BinaryOperator &I);
Instruction *visitUDiv(BinaryOperator &I);
Expand Down
71 changes: 39 additions & 32 deletions llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,29 +1158,39 @@ static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
return nullptr;
}

/// This function implements the transforms common to both integer division
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
/// Common integer divide transforms
Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;

/// Common integer divide/remainder transforms
Instruction *InstCombinerImpl::commonIDivRemTransforms(BinaryOperator &I) {
assert(I.isIntDivRem() && "Unexpected instruction");
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
bool IsSigned = I.getOpcode() == Instruction::SDiv;

// If any element of a constant divisor fixed width vector is zero or undef
// the behavior is undefined and we can fold the whole op to poison.
auto *Op1C = dyn_cast<Constant>(Op1);
Type *Ty = I.getType();
auto *VTy = dyn_cast<FixedVectorType>(Ty);
if (Op1C && VTy) {
unsigned NumElts = VTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
return replaceInstUsesWith(I, PoisonValue::get(Ty));
}
}

if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;

// The RHS is known non-zero.
if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
return replaceOperand(I, 1, V);

// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
// Handle cases involving: div/rem X, (select Cond, Y, Z)
if (simplifyDivRemOfSelectWithZeroOp(I))
return &I;

// If the divisor is a select-of-constants, try to constant fold all div ops:
// C / (select Cond, TrueC, FalseC) --> select Cond, (C / TrueC), (C / FalseC)
// C div/rem (select Cond, TrueC, FalseC) --> select Cond, (C div/rem TrueC),
// (C div/rem FalseC)
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
if (match(Op0, m_ImmConstant()) &&
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
Expand All @@ -1189,6 +1199,21 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
return R;
}

return nullptr;
}

/// This function implements the transforms common to both integer division
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
/// Common integer divide transforms
Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
if (Instruction *Res = commonIDivRemTransforms(I))
return Res;

Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
bool IsSigned = I.getOpcode() == Instruction::SDiv;
Type *Ty = I.getType();

const APInt *C2;
if (match(Op1, m_APInt(C2))) {
Value *X;
Expand Down Expand Up @@ -2138,29 +2163,11 @@ static Instruction *simplifyIRemMulShl(BinaryOperator &I,
/// remainder instructions.
/// Common integer remainder transforms
Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
if (Instruction *Res = commonIDivRemTransforms(I))
return Res;

Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

// The RHS is known non-zero.
if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
return replaceOperand(I, 1, V);

// Handle cases involving: rem X, (select Cond, Y, Z)
if (simplifyDivRemOfSelectWithZeroOp(I))
return &I;

// If the divisor is a select-of-constants, try to constant fold all rem ops:
// C % (select Cond, TrueC, FalseC) --> select Cond, (C % TrueC), (C % FalseC)
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
if (match(Op0, m_ImmConstant()) &&
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
/*FoldWithMultiUse*/ true))
return R;
}

if (isa<Constant>(Op1)) {
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1056,7 +1056,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
// Strictness of the comparison is irrelevant.
X = Cmp0;
Y = Cmp1;
if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
if (match(FVal, m_c_Add(m_NotForbidPoison(m_Specific(X)), m_Specific(Y)))) {
// (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
// (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
BinaryOperator *BO = cast<BinaryOperator>(FVal);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2490,7 +2490,7 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));

assert(CurLoop->isLoopInvariant(BaseX) &&
"Expected BaseX to be avaliable in the preheader!");
"Expected BaseX to be available in the preheader!");

if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
// FIXME: support right-shift?
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,8 +1415,8 @@ class LoopVectorizationCostModel {
/// \param TailFoldNonPowOf2 true if tail folding with non-power-of-2
/// safe distance can be enabled.
/// \param UserIC User specific interleave count.
void setTailFoldingStyles(bool IsScalableVF, bool TailFoldPowOf2, bool TailFoldNonPowOf2,
unsigned UserIC) {
void setTailFoldingStyles(bool IsScalableVF, bool TailFoldPowOf2,
bool TailFoldNonPowOf2, unsigned UserIC) {
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
if (!Legal->canFoldTailByMasking()) {
ChosenTailFoldingStyle =
Expand Down Expand Up @@ -1460,7 +1460,7 @@ class LoopVectorizationCostModel {
// DataWithoutLaneMask to try to vectorize the loop with folded tail
// in a generic way.
ChosenTailFoldingStyle =
std::make_pair(TailFoldingStyle::DataWithoutLaneMask,
std::make_pair(TailFoldingStyle::DataWithoutLaneMask,
TailFoldingStyle::DataWithoutLaneMask);
LLVM_DEBUG(
dbgs()
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,9 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
if (NewCost >= OldCost)

// If costs are equal, still fold as we reduce instruction count.
if (NewCost > OldCost)
return false;

Value *Shuf0 = Builder.CreateShuffleVector(Op00, Op01, NewMask0);
Expand Down
Loading