8 changes: 6 additions & 2 deletions clang/lib/Driver/ToolChains/Arch/Mips.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
bool IsN64 = ABIName == "64";
bool IsPIC = false;
bool NonPIC = false;
bool HasNaN2008Opt = false;

Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
Expand Down Expand Up @@ -285,9 +286,10 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
StringRef Val = StringRef(A->getValue());
if (Val == "2008") {
if (mips::getIEEE754Standard(CPUName) & mips::Std2008)
if (mips::getIEEE754Standard(CPUName) & mips::Std2008) {
Features.push_back("+nan2008");
else {
HasNaN2008Opt = true;
} else {
Features.push_back("-nan2008");
D.Diag(diag::warn_target_unsupported_nan2008) << CPUName;
}
Expand Down Expand Up @@ -323,6 +325,8 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
} else if (HasNaN2008Opt) {
Features.push_back("+abs2008");
}

AddTargetFeature(Args, Features, options::OPT_msingle_float,
Expand Down
17 changes: 9 additions & 8 deletions clang/lib/Sema/SemaConcept.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,10 +771,9 @@ namespace {
};
} // namespace

static const Expr *
SubstituteConstraintExpression(Sema &S,
const Sema::TemplateCompareNewDeclInfo &DeclInfo,
const Expr *ConstrExpr) {
static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
Sema &S, const Sema::TemplateCompareNewDeclInfo &DeclInfo,
const Expr *ConstrExpr) {
MultiLevelTemplateArgumentList MLTAL = S.getTemplateInstantiationArgs(
DeclInfo.getDecl(), DeclInfo.getLexicalDeclContext(), /*Final=*/false,
/*Innermost=*/nullptr,
Expand All @@ -797,8 +796,8 @@ SubstituteConstraintExpression(Sema &S,
std::optional<Sema::CXXThisScopeRAII> ThisScope;
if (auto *RD = dyn_cast<CXXRecordDecl>(DeclInfo.getDeclContext()))
ThisScope.emplace(S, const_cast<CXXRecordDecl *>(RD), Qualifiers());
ExprResult SubstConstr =
S.SubstConstraintExpr(const_cast<clang::Expr *>(ConstrExpr), MLTAL);
ExprResult SubstConstr = S.SubstConstraintExprWithoutSatisfaction(
const_cast<clang::Expr *>(ConstrExpr), MLTAL);
if (SFINAE.hasErrorOccurred() || !SubstConstr.isUsable())
return nullptr;
return SubstConstr.get();
Expand All @@ -814,12 +813,14 @@ bool Sema::AreConstraintExpressionsEqual(const NamedDecl *Old,
if (Old && !New.isInvalid() && !New.ContainsDecl(Old) &&
Old->getLexicalDeclContext() != New.getLexicalDeclContext()) {
if (const Expr *SubstConstr =
SubstituteConstraintExpression(*this, Old, OldConstr))
SubstituteConstraintExpressionWithoutSatisfaction(*this, Old,
OldConstr))
OldConstr = SubstConstr;
else
return false;
if (const Expr *SubstConstr =
SubstituteConstraintExpression(*this, New, NewConstr))
SubstituteConstraintExpressionWithoutSatisfaction(*this, New,
NewConstr))
NewConstr = SubstConstr;
else
return false;
Expand Down
89 changes: 46 additions & 43 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9900,15 +9900,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// Match up the template parameter lists with the scope specifier, then
// determine whether we have a template or a template specialization.
bool Invalid = false;
TemplateIdAnnotation *TemplateId =
D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
? D.getName().TemplateId
: nullptr;
TemplateParameterList *TemplateParams =
MatchTemplateParametersToScopeSpecifier(
D.getDeclSpec().getBeginLoc(), D.getIdentifierLoc(),
D.getCXXScopeSpec(), TemplateId, TemplateParamLists, isFriend,
isMemberSpecialization, Invalid);
D.getCXXScopeSpec(),
D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId
? D.getName().TemplateId
: nullptr,
TemplateParamLists, isFriend, isMemberSpecialization,
Invalid);
if (TemplateParams) {
// Check that we can declare a template here.
if (CheckTemplateDeclScope(S, TemplateParams))
Expand All @@ -9921,11 +9921,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
Diag(NewFD->getLocation(), diag::err_destructor_template);
NewFD->setInvalidDecl();
// Function template with explicit template arguments.
} else if (TemplateId) {
Diag(D.getIdentifierLoc(), diag::err_function_template_partial_spec)
<< SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc);
NewFD->setInvalidDecl();
}

// If we're adding a template to a dependent context, we may need to
Expand Down Expand Up @@ -9978,11 +9973,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
<< FixItHint::CreateRemoval(RemoveRange)
<< FixItHint::CreateInsertion(InsertLoc, "<>");
Invalid = true;

// Recover by faking up an empty template argument list.
HasExplicitTemplateArgs = true;
TemplateArgs.setLAngleLoc(InsertLoc);
TemplateArgs.setRAngleLoc(InsertLoc);
}
}
} else {
Expand All @@ -9996,33 +9986,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
if (TemplateParamLists.size() > 0)
// For source fidelity, store all the template param lists.
NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists);

// "friend void foo<>(int);" is an implicit specialization decl.
if (isFriend && TemplateId)
isFunctionTemplateSpecialization = true;
}

// If this is a function template specialization and the unqualified-id of
// the declarator-id is a template-id, convert the template argument list
// into our AST format and check for unexpanded packs.
if (isFunctionTemplateSpecialization && TemplateId) {
HasExplicitTemplateArgs = true;

TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc);
TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc);
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
translateTemplateArguments(TemplateArgsPtr, TemplateArgs);

// FIXME: Should we check for unexpanded packs if this was an (invalid)
// declaration of a function template partial specialization? Should we
// consider the unexpanded pack context to be a partial specialization?
for (const TemplateArgumentLoc &ArgLoc : TemplateArgs.arguments()) {
if (DiagnoseUnexpandedParameterPack(
ArgLoc, isFriend ? UPPC_FriendDeclaration
: UPPC_ExplicitSpecialization))
NewFD->setInvalidDecl();
}
}

if (Invalid) {
Expand Down Expand Up @@ -10475,6 +10438,46 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
diag::ext_operator_new_delete_declared_inline)
<< NewFD->getDeclName();

// If the declarator is a template-id, translate the parser's template
// argument list into our AST format.
if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) {
TemplateIdAnnotation *TemplateId = D.getName().TemplateId;
TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc);
TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc);
ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
TemplateId->NumArgs);
translateTemplateArguments(TemplateArgsPtr,
TemplateArgs);

HasExplicitTemplateArgs = true;

if (NewFD->isInvalidDecl()) {
HasExplicitTemplateArgs = false;
} else if (FunctionTemplate) {
// Function template with explicit template arguments.
Diag(D.getIdentifierLoc(), diag::err_function_template_partial_spec)
<< SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc);

HasExplicitTemplateArgs = false;
} else if (isFriend) {
// "friend void foo<>(int);" is an implicit specialization decl.
isFunctionTemplateSpecialization = true;
} else {
assert(isFunctionTemplateSpecialization &&
"should have a 'template<>' for this decl");
}
} else if (isFriend && isFunctionTemplateSpecialization) {
// This combination is only possible in a recovery case; the user
// wrote something like:
// template <> friend void foo(int);
// which we're recovering from as if the user had written:
// friend void foo<>(int);
// Go ahead and fake up a template id.
HasExplicitTemplateArgs = true;
TemplateArgs.setLAngleLoc(D.getIdentifierLoc());
TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
}

// We do not add HD attributes to specializations here because
// they may have different constexpr-ness compared to their
// templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
Expand Down
22 changes: 20 additions & 2 deletions clang/lib/Sema/SemaTemplateInstantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,7 @@ namespace {
const MultiLevelTemplateArgumentList &TemplateArgs;
SourceLocation Loc;
DeclarationName Entity;
// Whether to evaluate the C++20 constraints or simply substitute into them.
bool EvaluateConstraints = true;

public:
Expand Down Expand Up @@ -2499,6 +2500,17 @@ TemplateInstantiator::TransformNestedRequirement(
Req->getConstraintExpr()->getBeginLoc(), Req,
Sema::InstantiatingTemplate::ConstraintsCheck{},
Req->getConstraintExpr()->getSourceRange());
if (!getEvaluateConstraints()) {
ExprResult TransConstraint = TransformExpr(Req->getConstraintExpr());
if (TransConstraint.isInvalid() || !TransConstraint.get())
return nullptr;
if (TransConstraint.get()->isInstantiationDependent())
return new (SemaRef.Context)
concepts::NestedRequirement(TransConstraint.get());
ConstraintSatisfaction Satisfaction;
return new (SemaRef.Context) concepts::NestedRequirement(
SemaRef.Context, TransConstraint.get(), Satisfaction);
}

ExprResult TransConstraint;
ConstraintSatisfaction Satisfaction;
Expand Down Expand Up @@ -4093,13 +4105,19 @@ Sema::SubstExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) {
ExprResult
Sema::SubstConstraintExpr(Expr *E,
const MultiLevelTemplateArgumentList &TemplateArgs) {
// FIXME: should call SubstExpr directly if this function is equivalent or
// should it be different?
return SubstExpr(E, TemplateArgs);
}

ExprResult Sema::SubstConstraintExprWithoutSatisfaction(
Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) {
if (!E)
return E;

// This is where we need to make sure we 'know' constraint checking needs to
// happen.
TemplateInstantiator Instantiator(*this, TemplateArgs, SourceLocation(),
DeclarationName());
Instantiator.setEvaluateConstraints(false);
return Instantiator.TransformExpr(E);
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
private:
CallDescriptionMap<FnDescription> FnDescriptions = {
{{{"fopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}},
{{{"fdopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}},
{{{"freopen"}, 3},
{&StreamChecker::preFreopen, &StreamChecker::evalFreopen, 2}},
{{{"tmpfile"}, 0}, {nullptr, &StreamChecker::evalFopen, ArgNone}},
Expand Down
1 change: 1 addition & 0 deletions clang/test/Analysis/Inputs/system-header-simulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ FILE *funopen(const void *,
int (*)(void *));

FILE *fopen(const char *restrict path, const char *restrict mode);
FILE *fdopen(int fd, const char *mode);
FILE *tmpfile(void);
FILE *freopen(const char *restrict pathname, const char *restrict mode, FILE *restrict stream);
int fclose(FILE *fp);
Expand Down
13 changes: 11 additions & 2 deletions clang/test/Analysis/stream-error.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ void error_fopen(void) {
fclose(F);
}

void error_fdopen(int fd) {
FILE *F = fdopen(fd, "r");
if (!F)
return;
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}}
fclose(F);
}

void error_freopen(void) {
FILE *F = fopen("file", "r");
if (!F)
Expand Down Expand Up @@ -146,8 +155,8 @@ void error_fgets(void) {
fgets(Buf, sizeof(Buf), F); // expected-warning {{Stream might be already closed}}
}

void error_fputc(void) {
FILE *F = tmpfile();
void error_fputc(int fd) {
FILE *F = fdopen(fd, "w");
if (!F)
return;
int Ret = fputc('X', F);
Expand Down
7 changes: 6 additions & 1 deletion clang/test/Analysis/stream-non-posix-function.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@ typedef struct _FILE FILE;
// These functions are not standard C library functions.
FILE *tmpfile(const char *restrict path); // Real 'tmpfile' should have exactly 0 formal parameters.
FILE *fopen(const char *restrict path); // Real 'fopen' should have exactly 2 formal parameters.
FILE *fdopen(int fd); // Real 'fdopen' should have exactly 2 formal parameters.

void test_fopen_non_posix(void) {
FILE *fp = fopen("file"); // no-leak: This isn't the standard POSIX `fopen`, we don't know the semantics of this call.
}

void test_tmpfile_non_posix(void) {
FILE *fp = tmpfile("file"); // // no-leak: This isn't the standard POSIX `tmpfile`, we don't know the semantics of this call.
FILE *fp = tmpfile("file"); // no-leak: This isn't the standard POSIX `tmpfile`, we don't know the semantics of this call.
}

void test_fdopen_non_posix(int fd) {
FILE *fp = fdopen(fd); // no-leak: This isn't the standard POSIX `fdopen`, we don't know the semantics of this call.
}
10 changes: 10 additions & 0 deletions clang/test/Analysis/stream-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ void check_note_freopen(void) {
// expected-warning@-1 {{Opened stream never closed. Potential resource leak}}
// expected-note@-2 {{Opened stream never closed. Potential resource leak}}

void check_note_fdopen(int fd) {
FILE *F = fdopen(fd, "r"); // expected-note {{Stream opened here}}
if (!F)
// expected-note@-1 {{'F' is non-null}}
// expected-note@-2 {{Taking false branch}}
return;
}
// expected-warning@-1 {{Opened stream never closed. Potential resource leak}}
// expected-note@-2 {{Opened stream never closed. Potential resource leak}}

void check_note_leak_2(int c) {
FILE *F1 = fopen("foo1.c", "r"); // expected-note {{Stream opened here}}
// stdargs-note@-1 {{'fopen' is successful}}
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Analysis/stream-stdlibraryfunctionargs.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ void test_fopen(void) {
// stdfunc-warning{{should not be NULL}}
}

void test_fdopen(int fd) {
FILE *fp = fdopen(fd, "r");
clang_analyzer_eval(fp != NULL); // any-warning{{TRUE}} any-warning{{FALSE}}
fclose(fp); // \
// stream-warning{{Stream pointer might be NULL}} \
// stdfunc-warning{{should not be NULL}}
}

void test_tmpfile(void) {
FILE *fp = tmpfile();
clang_analyzer_eval(fp != NULL); // any-warning{{TRUE}} any-warning{{FALSE}}
Expand Down
7 changes: 7 additions & 0 deletions clang/test/Analysis/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ void f_open(void) {
fclose(p);
}

void f_dopen(int fd) {
FILE *F = fdopen(fd, "r");
char buf[1024];
fread(buf, 1, 1, F); // expected-warning {{Stream pointer might be NULL}}
fclose(F);
}

void f_seek(void) {
FILE *p = fopen("foo", "r");
if (!p)
Expand Down
12 changes: 0 additions & 12 deletions clang/test/CXX/temp/temp.decls/temp.variadic/p5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,11 +376,6 @@ namespace Specializations {
template<typename... Ts>
struct PrimaryClass<Ts>; // expected-error{{partial specialization contains unexpanded parameter pack 'Ts'}}

template<typename T, typename... Ts>
void PrimaryFunction();
template<typename T, typename... Ts>
void PrimaryFunction<Ts>(); // expected-error{{function template partial specialization is not allowed}}

#if __cplusplus >= 201402L
template<typename T, typename... Ts>
constexpr int PrimaryVar = 0;
Expand All @@ -397,13 +392,6 @@ namespace Specializations {
template<typename U>
struct InnerClass<U, Ts>; // expected-error{{partial specialization contains unexpanded parameter pack 'Ts'}}

template<typename... Us>
void InnerFunction();
template<>
void InnerFunction<Ts>(); // expected-error{{explicit specialization contains unexpanded parameter pack 'Ts'}}

friend void PrimaryFunction<Ts>(); // expected-error{{friend declaration contains unexpanded parameter pack 'Ts'}}

#if __cplusplus >= 201402L
template<typename... Us>
constexpr static int InnerVar = 0;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/avr/avr-inline-asm-constraints.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ void R() {
}

void G() {
// CHECK: call addrspace(0) void asm sideeffect "subi r30, $0", "G"(i16 50)
asm("subi r30, %0" :: "G"(50));
// CHECK: call addrspace(0) void asm sideeffect "subi r30, $0", "G"(i16 0)
asm("subi r30, %0" :: "G"(0));
}

void Q() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ const unsigned char val = 0;
int foo(void) {
__asm__ volatile("foo %0, 1" : : "fo" (val)); // expected-error {{invalid input constraint 'fo' in asm}}
__asm__ volatile("foo %0, 1" : : "Nd" (val)); // expected-error {{invalid input constraint 'Nd' in asm}}
__asm__ volatile("subi r30, %0" : : "G" (1)); // expected-error {{value '1' out of range for constraint 'G'}}
}
8 changes: 7 additions & 1 deletion clang/test/Driver/mips-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,13 @@
// RUN: %clang -target mips-linux-gnu -march=mips32r3 -### -c %s \
// RUN: -mnan=legacy -mnan=2008 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-NAN2008 %s
// CHECK-NAN2008: "-target-feature" "+nan2008"
// CHECK-NAN2008: "-target-feature" "+nan2008" "-target-feature" "+abs2008"
//
// -mnan=2008 -mabs=legacy
// RUN: %clang -target mips-linux-gnu -march=mips32r3 -### -c %s \
// RUN: -mabs=legacy -mnan=2008 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-ABSLEGACYNAN2008 %s
// CHECK-ABSLEGACYNAN2008: "-target-feature" "+nan2008" "-target-feature" "-abs2008"
//
// -mnan=legacy
// RUN: %clang -target mips-linux-gnu -march=mips32r3 -### -c %s \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaCXX/template-instantiation.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: %clang_cc1 -verify -fsyntax-only -Wno-ignored-attributes %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -verify -fsyntax-only %s
// expected-no-diagnostics

namespace GH76521 {

template <typename T>
void foo() {
auto l = []() __attribute__((pcs("aapcs-vfp"))) {};
auto l = []() __attribute__((preserve_most)) {};
}

void bar() {
Expand Down
32 changes: 32 additions & 0 deletions clang/test/SemaTemplate/concepts-out-of-line-def.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,35 @@ struct bar {
bar<int> x;
} // namespace GH61763


namespace GH74314 {
template <class T, class U> constexpr bool is_same_v = __is_same(T, U);
template <class T, class U> constexpr bool is_not_same_v = !__is_same(T, U);

template <class Result>
concept something_interesting = requires {
true;
requires is_same_v<int, Result>;
};

template <class T>
struct X {
void foo() requires requires { requires is_not_same_v<T, int>; };
void bar(decltype(requires { requires is_not_same_v<T, int>; }));
};

template <class T>
void X<T>::foo() requires requires { requires something_interesting<T>; } {}
// expected-error@-1{{definition of 'foo' does not match any declaration}}
// expected-note@*{{}}

template <class T>
void X<T>::foo() requires requires { requires is_not_same_v<T, int>; } {} // ok

template <class T>
void X<T>::bar(decltype(requires { requires something_interesting<T>; })) {}
// expected-error@-1{{definition of 'bar' does not match any declaration}}

template <class T>
void X<T>::bar(decltype(requires { requires is_not_same_v<T, int>; })) {}
} // namespace GH74314
33 changes: 33 additions & 0 deletions clang/unittests/AST/ASTImporterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9342,6 +9342,39 @@ TEST_P(ASTImporterOptionSpecificTestBase, ImportConflictTypeAliasTemplate) {
EXPECT_FALSE(ImportedCallable);
}

AST_MATCHER(ClassTemplateSpecializationDecl, hasInstantiatedFromMember) {
if (auto Instantiate = Node.getInstantiatedFrom()) {
if (auto *FromPartialSpecialization =
Instantiate.get<ClassTemplatePartialSpecializationDecl *>()) {
return nullptr != FromPartialSpecialization->getInstantiatedFromMember();
}
}
return false;
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportInstantiatedFromMember) {
const char *Code =
R"(
template <typename> struct B {
template <typename, bool = false> union D;
template <typename T> union D<T> {};
D<int> d;
};
B<int> b;
)";
Decl *FromTU = getTuDecl(Code, Lang_CXX11);
auto *FromD = FirstDeclMatcher<ClassTemplateSpecializationDecl>().match(
FromTU, classTemplateSpecializationDecl(hasName("D"),
hasInstantiatedFromMember()));
auto *FromPartialSpecialization =
cast<ClassTemplatePartialSpecializationDecl *>(
FromD->getInstantiatedFrom());
ASSERT_TRUE(FromPartialSpecialization->getInstantiatedFromMember());
auto *ImportedPartialSpecialization =
Import(FromPartialSpecialization, Lang_CXX11);
EXPECT_TRUE(ImportedPartialSpecialization->getInstantiatedFromMember());
}

INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest,
DefaultTestValuesForRunOptions);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class DataflowAnalysisTest : public Test {
const std::optional<StateT> &MaybeState = BlockStates[Block->getBlockID()];
assert(MaybeState.has_value());
return *MaybeState;
};
}

std::unique_ptr<ASTUnit> AST;
std::unique_ptr<ControlFlowContext> CFCtx;
Expand Down
103 changes: 62 additions & 41 deletions clang/utils/TableGen/ClangASTNodesEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class ClangASTNodesEmitter {
typedef std::multimap<ASTNode, ASTNode> ChildMap;
typedef ChildMap::const_iterator ChildIterator;

std::set<ASTNode> PrioritizedClasses;
RecordKeeper &Records;
ASTNode Root;
const std::string &NodeClassName;
Expand Down Expand Up @@ -70,8 +71,16 @@ class ClangASTNodesEmitter {
std::pair<ASTNode, ASTNode> EmitNode(raw_ostream& OS, ASTNode Base);
public:
explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N,
const std::string &S)
: Records(R), NodeClassName(N), BaseSuffix(S) {}
const std::string &S,
std::string_view PriorizeIfSubclassOf)
: Records(R), NodeClassName(N), BaseSuffix(S) {
auto vecPrioritized =
PriorizeIfSubclassOf.empty()
? std::vector<Record *>{}
: R.getAllDerivedDefinitions(PriorizeIfSubclassOf);
PrioritizedClasses =
std::set<ASTNode>(vecPrioritized.begin(), vecPrioritized.end());
}

// run - Output the .inc file contents
void run(raw_ostream &OS);
Expand All @@ -95,8 +104,23 @@ std::pair<ASTNode, ASTNode> ClangASTNodesEmitter::EmitNode(raw_ostream &OS,
if (!Base.isAbstract())
First = Last = Base;

auto comp = [this](ASTNode LHS, ASTNode RHS) {
auto LHSPrioritized = PrioritizedClasses.count(LHS) > 0;
auto RHSPrioritized = PrioritizedClasses.count(RHS) > 0;
if (LHSPrioritized && !RHSPrioritized)
return true;
if (!LHSPrioritized && RHSPrioritized)
return false;

return LHS.getName() > RHS.getName();
};
auto SortedChildren = std::set<ASTNode, decltype(comp)>(comp);

for (; i != e; ++i) {
ASTNode Child = i->second;
SortedChildren.insert(i->second);
}

for (const auto &Child : SortedChildren) {
bool Abstract = Child.isAbstract();
std::string NodeName = macroName(std::string(Child.getName()));

Expand Down Expand Up @@ -148,9 +172,7 @@ void ClangASTNodesEmitter::deriveChildTree() {
const std::vector<Record*> Stmts
= Records.getAllDerivedDefinitions(NodeClassName);

for (unsigned i = 0, e = Stmts.size(); i != e; ++i) {
Record *R = Stmts[i];

for (auto *R : Stmts) {
if (auto B = R->getValueAsOptionalDef(BaseFieldName))
Tree.insert(std::make_pair(B, R));
else if (Root)
Expand Down Expand Up @@ -182,9 +204,9 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) {
OS << "#endif\n\n";

OS << "#ifndef LAST_" << macroHierarchyName() << "_RANGE\n";
OS << "# define LAST_"
<< macroHierarchyName() << "_RANGE(Base, First, Last) "
<< macroHierarchyName() << "_RANGE(Base, First, Last)\n";
OS << "# define LAST_" << macroHierarchyName()
<< "_RANGE(Base, First, Last) " << macroHierarchyName()
<< "_RANGE(Base, First, Last)\n";
OS << "#endif\n\n";

EmitNode(OS, Root);
Expand All @@ -196,8 +218,20 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) {
}

void clang::EmitClangASTNodes(RecordKeeper &RK, raw_ostream &OS,
const std::string &N, const std::string &S) {
ClangASTNodesEmitter(RK, N, S).run(OS);
const std::string &N, const std::string &S,
std::string_view PriorizeIfSubclassOf) {
ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS);
}

void printDeclContext(const std::multimap<Record *, Record *> &Tree,
Record *DeclContext, raw_ostream &OS) {
if (!DeclContext->getValueAsBit(AbstractFieldName))
OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n";
auto i = Tree.lower_bound(DeclContext);
auto end = Tree.upper_bound(DeclContext);
for (; i != end; ++i) {
printDeclContext(Tree, i->second, OS);
}
}

// Emits and addendum to a .inc file to enumerate the clang declaration
Expand All @@ -210,38 +244,25 @@ void clang::EmitClangDeclContext(RecordKeeper &Records, raw_ostream &OS) {
OS << "#ifndef DECL_CONTEXT\n";
OS << "# define DECL_CONTEXT(DECL)\n";
OS << "#endif\n";

OS << "#ifndef DECL_CONTEXT_BASE\n";
OS << "# define DECL_CONTEXT_BASE(DECL) DECL_CONTEXT(DECL)\n";
OS << "#endif\n";

typedef std::set<Record*> RecordSet;
typedef std::vector<Record*> RecordVector;

RecordVector DeclContextsVector
= Records.getAllDerivedDefinitions(DeclContextNodeClassName);
RecordVector Decls = Records.getAllDerivedDefinitions(DeclNodeClassName);
RecordSet DeclContexts (DeclContextsVector.begin(), DeclContextsVector.end());

for (RecordVector::iterator i = Decls.begin(), e = Decls.end(); i != e; ++i) {
Record *R = *i;

if (Record *B = R->getValueAsOptionalDef(BaseFieldName)) {
if (DeclContexts.find(B) != DeclContexts.end()) {
OS << "DECL_CONTEXT_BASE(" << B->getName() << ")\n";
DeclContexts.erase(B);
}
}

std::vector<Record *> DeclContextsVector =
Records.getAllDerivedDefinitions(DeclContextNodeClassName);
std::vector<Record *> Decls =
Records.getAllDerivedDefinitions(DeclNodeClassName);

std::multimap<Record *, Record *> Tree;

const std::vector<Record *> Stmts =
Records.getAllDerivedDefinitions(DeclNodeClassName);

for (auto *R : Stmts) {
if (auto *B = R->getValueAsOptionalDef(BaseFieldName))
Tree.insert(std::make_pair(B, R));
}

// To keep identical order, RecordVector may be used
// instead of RecordSet.
for (RecordVector::iterator
i = DeclContextsVector.begin(), e = DeclContextsVector.end();
i != e; ++i)
if (DeclContexts.find(*i) != DeclContexts.end())
OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n";
for (auto *DeclContext : DeclContextsVector) {
printDeclContext(Tree, DeclContext, OS);
}

OS << "#undef DECL_CONTEXT\n";
OS << "#undef DECL_CONTEXT_BASE\n";
}
3 changes: 2 additions & 1 deletion clang/utils/TableGen/TableGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
EmitClangASTNodes(Records, OS, CommentNodeClassName, "");
break;
case GenClangDeclNodes:
EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl");
EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl",
DeclContextNodeClassName);
EmitClangDeclContext(Records, OS);
break;
case GenClangStmtNodes:
Expand Down
10 changes: 9 additions & 1 deletion clang/utils/TableGen/TableGenBackends.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,16 @@ class RecordKeeper;
namespace clang {

void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
/**
@param PriorizeIfSubclassOf These classes should be prioritized in the output.
This is useful to force enum generation/jump tables/lookup tables to be more
compact in both size and surrounding code in hot functions. An example use is
in Decl for classes that inherit from DeclContext, for functions like
castFromDeclContext.
*/
void EmitClangASTNodes(llvm::RecordKeeper &RK, llvm::raw_ostream &OS,
const std::string &N, const std::string &S);
const std::string &N, const std::string &S,
std::string_view PriorizeIfSubclassOf = "");
void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
Expand Down
204 changes: 96 additions & 108 deletions libc/src/__support/FPUtil/FPBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ enum class FPType {

namespace internal {

// The type of encoding for supported floating point types.
enum class FPEncoding {
IEEE754,
X86_ExtendedPrecision,
};

// Defines the layout (sign, exponent, significand) of a floating point type in
// memory. It also defines its associated StorageType, i.e., the unsigned
// integer type used to manipulate its representation.
Expand All @@ -49,47 +43,41 @@ template <> struct FPLayout<FPType::IEEE754_Binary16> {
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 5;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};

template <> struct FPLayout<FPType::IEEE754_Binary32> {
using StorageType = uint32_t;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};

template <> struct FPLayout<FPType::IEEE754_Binary64> {
using StorageType = uint64_t;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 11;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};

template <> struct FPLayout<FPType::IEEE754_Binary128> {
using StorageType = UInt128;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 112;
LIBC_INLINE_VAR static constexpr auto ENCODING = FPEncoding::IEEE754;
};

template <> struct FPLayout<FPType::X86_Binary80> {
using StorageType = UInt128;
LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
LIBC_INLINE_VAR static constexpr auto ENCODING =
FPEncoding::X86_ExtendedPrecision;
};

} // namespace internal

// FPBaseMasksAndShifts derives useful constants from the FPLayout.
// FPRepBase derives useful constants from the FPLayout.
template <FPType fp_type>
struct FPBaseMasksAndShifts : public internal::FPLayout<fp_type> {
struct FPRepBase : public internal::FPLayout<fp_type> {
private:
using UP = internal::FPLayout<fp_type>;

Expand Down Expand Up @@ -149,95 +137,67 @@ struct FPBaseMasksAndShifts : public internal::FPLayout<fp_type> {
return StorageType(1) << position;
}

public:
// Merge bits from 'a' and 'b' values according to 'mask'.
// Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when
// corresponding bits are ones.
LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b,
StorageType mask) {
// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
return a ^ ((a ^ b) & mask);
}

protected:
// The number of bits after the decimal dot when the number is in normal form.
LIBC_INLINE_VAR static constexpr int FRACTION_LEN =
UP::ENCODING == internal::FPEncoding::X86_ExtendedPrecision ? SIG_LEN - 1
: SIG_LEN;
fp_type == FPType::X86_Binary80 ? SIG_LEN - 1 : SIG_LEN;
LIBC_INLINE_VAR static constexpr uint32_t MANTISSA_PRECISION =
FRACTION_LEN + 1;
LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK =
mask_trailing_ones<StorageType, FRACTION_LEN>();

protected:
// If a number x is a NAN, then it is a quiet NAN if:
// QUIET_NAN_MASK & bits(x) != 0
LIBC_INLINE_VAR static constexpr StorageType QUIET_NAN_MASK =
UP::ENCODING == internal::FPEncoding::X86_ExtendedPrecision
fp_type == FPType::X86_Binary80
? bit_at(SIG_LEN - 1) | bit_at(SIG_LEN - 2) // 0b1100...
: bit_at(SIG_LEN - 1); // 0b1000...

// If a number x is a NAN, then it is a signalling NAN if:
// SIGNALING_NAN_MASK & bits(x) != 0
LIBC_INLINE_VAR static constexpr StorageType SIGNALING_NAN_MASK =
UP::ENCODING == internal::FPEncoding::X86_ExtendedPrecision
fp_type == FPType::X86_Binary80
? bit_at(SIG_LEN - 1) | bit_at(SIG_LEN - 3) // 0b1010...
: bit_at(SIG_LEN - 2); // 0b0100...
};

namespace internal {

// This is a temporary class to unify common methods and properties between
// FPBits and FPBits<long double>.
template <FPType fp_type> struct FPRep : private FPBaseMasksAndShifts<fp_type> {
using UP = FPBaseMasksAndShifts<fp_type>;
using typename UP::StorageType;
using UP::TOTAL_LEN;

protected:
using UP::EXP_SIG_MASK;
using UP::QUIET_NAN_MASK;
// The floating point number representation as an unsigned integer.
StorageType bits = 0;

public:
using UP::EXP_BIAS;
using UP::EXP_LEN;
using UP::EXP_MASK;
using UP::EXP_MASK_SHIFT;
using UP::FP_MASK;
using UP::FRACTION_LEN;
using UP::FRACTION_MASK;
using UP::MANTISSA_PRECISION;
using UP::SIGN_MASK;
using UP::STORAGE_LEN;

// Reinterpreting bits as an integer value and interpreting the bits of an
// integer value as a floating point value is used in tests. So, a convenient
// type is provided for such reinterpretations.
StorageType bits;

LIBC_INLINE constexpr FPRep() : bits(0) {}
LIBC_INLINE explicit constexpr FPRep(StorageType bits) : bits(bits) {}

LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
mantVal &= FRACTION_MASK;
bits &= ~FRACTION_MASK;
bits |= mantVal;
}

LIBC_INLINE constexpr StorageType get_mantissa() const {
return bits & FRACTION_MASK;
LIBC_INLINE constexpr bool get_sign() const {
return (bits & SIGN_MASK) != 0;
}

LIBC_INLINE constexpr void set_sign(bool signVal) {
if (get_sign() != signVal)
bits ^= SIGN_MASK;
}

LIBC_INLINE constexpr bool get_sign() const {
return (bits & SIGN_MASK) != 0;
LIBC_INLINE constexpr StorageType get_mantissa() const {
return bits & FRACTION_MASK;
}

LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) {
// clear exponent bits
bits &= ~EXP_MASK;
// set exponent bits
bits |= (biased << EXP_MASK_SHIFT) & EXP_MASK;
LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
bits = merge(bits, mantVal, FRACTION_MASK);
}

LIBC_INLINE constexpr uint16_t get_biased_exponent() const {
return uint16_t((bits & EXP_MASK) >> EXP_MASK_SHIFT);
}

LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) {
bits = merge(bits, biased << EXP_MASK_SHIFT, EXP_MASK);
}

LIBC_INLINE constexpr int get_exponent() const {
return int(get_biased_exponent()) - EXP_BIAS;
}
Expand Down Expand Up @@ -266,6 +226,23 @@ template <FPType fp_type> struct FPRep : private FPBaseMasksAndShifts<fp_type> {
}
};

namespace internal {

// Manipulates the representation of a floating point number defined by its
// FPType. This layer is architecture agnostic and does not handle C++ floating
// point types directly ('float', 'double' and 'long double'). Use the FPBits
// below if needed.
//
// TODO: Specialize this class for FPType::X86_Binary80 and remove ad-hoc logic
// from FPRepBase.
template <FPType fp_type> struct FPRep : public FPRepBase<fp_type> {
using UP = FPRepBase<fp_type>;
using typename UP::StorageType;
using UP::FRACTION_LEN;
using UP::FRACTION_MASK;
using UP::MANTISSA_PRECISION;
};

} // namespace internal

// Returns the FPType corresponding to C++ type T on the host.
Expand Down Expand Up @@ -311,14 +288,16 @@ template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
static_assert(cpp::is_floating_point_v<T>,
"FPBits instantiated with invalid type.");
using UP = internal::FPRep<get_fp_type<T>()>;
using StorageType = typename UP::StorageType;
using UP::bits;

private:
using UP::EXP_SIG_MASK;
using UP::QUIET_NAN_MASK;
using UP::SIG_LEN;
using UP::SIG_MASK;

public:
using StorageType = typename UP::StorageType;
using UP::bits;
using UP::EXP_BIAS;
using UP::EXP_LEN;
using UP::EXP_MASK;
Expand All @@ -327,46 +306,47 @@ template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
using UP::FRACTION_MASK;
using UP::SIGN_MASK;
using UP::TOTAL_LEN;
using UP::UP;

using UP::get_biased_exponent;
using UP::is_zero;

// The function return mantissa with the implicit bit set iff the current
// value is a valid normal number.
LIBC_INLINE constexpr StorageType get_explicit_mantissa() {
return ((get_biased_exponent() > 0 && !is_inf_or_nan())
? (FRACTION_MASK + 1)
: 0) |
(FRACTION_MASK & bits);
}

// Constants.
static constexpr int MAX_BIASED_EXPONENT = (1 << EXP_LEN) - 1;
static constexpr StorageType MIN_SUBNORMAL = StorageType(1);
static constexpr StorageType MAX_SUBNORMAL = FRACTION_MASK;
static constexpr StorageType MIN_NORMAL = (StorageType(1) << FRACTION_LEN);
static constexpr StorageType MAX_NORMAL =
((StorageType(MAX_BIASED_EXPONENT) - 1) << FRACTION_LEN) | MAX_SUBNORMAL;

// We don't want accidental type promotions/conversions, so we require exact
// type match.
template <typename XType, cpp::enable_if_t<cpp::is_same_v<T, XType>, int> = 0>
LIBC_INLINE constexpr explicit FPBits(XType x)
: UP(cpp::bit_cast<StorageType>(x)) {}
(StorageType(MAX_BIASED_EXPONENT - 1) << SIG_LEN) | SIG_MASK;

template <typename XType,
cpp::enable_if_t<cpp::is_same_v<XType, StorageType>, int> = 0>
LIBC_INLINE constexpr explicit FPBits(XType x) : UP(x) {}
// Constructors.
LIBC_INLINE constexpr FPBits() = default;

LIBC_INLINE constexpr FPBits() : UP() {}

LIBC_INLINE constexpr void set_val(T value) {
bits = cpp::bit_cast<StorageType>(value);
template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) {
using Unqual = typename cpp::remove_cv_t<XType>;
if constexpr (cpp::is_same_v<Unqual, T>) {
bits = cpp::bit_cast<StorageType>(x);
} else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
bits = x;
} else {
// We don't want accidental type promotions/conversions, so we require
// exact type match.
static_assert(cpp::always_false<XType>);
}
}

// Floating-point conversions.
LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(bits); }

LIBC_INLINE constexpr explicit operator T() const { return get_val(); }

// The function return mantissa with the implicit bit set iff the current
// value is a valid normal number.
LIBC_INLINE constexpr StorageType get_explicit_mantissa() {
return ((get_biased_exponent() > 0 && !is_inf_or_nan())
? (FRACTION_MASK + 1)
: 0) |
(FRACTION_MASK & bits);
}

LIBC_INLINE constexpr bool is_inf() const {
return (bits & EXP_SIG_MASK) == EXP_MASK;
}
Expand All @@ -387,14 +367,22 @@ template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
return FPBits(bits & EXP_SIG_MASK);
}

// Methods below this are used by tests.

LIBC_INLINE static constexpr T zero(bool sign = false) {
return FPBits(sign ? SIGN_MASK : StorageType(0)).get_val();
StorageType rep = (sign ? SIGN_MASK : StorageType(0)) // sign
| 0 // exponent
| 0; // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr T neg_zero() { return zero(true); }

LIBC_INLINE static constexpr T inf(bool sign = false) {
return FPBits((sign ? SIGN_MASK : StorageType(0)) | EXP_MASK).get_val();
StorageType rep = (sign ? SIGN_MASK : StorageType(0)) // sign
| EXP_MASK // exponent
| 0; // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr T neg_inf() { return inf(true); }
Expand All @@ -416,15 +404,24 @@ template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
}

LIBC_INLINE static constexpr T build_nan(StorageType v) {
FPBits<T> bits(inf());
bits.set_mantissa(v);
return T(bits);
StorageType rep = 0 // sign
| EXP_MASK // exponent
| (v & FRACTION_MASK); // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr T build_quiet_nan(StorageType v) {
return build_nan(QUIET_NAN_MASK | v);
}

LIBC_INLINE static constexpr FPBits<T>
create_value(bool sign, StorageType biased_exp, StorageType mantissa) {
StorageType rep = (sign ? SIGN_MASK : StorageType(0)) // sign
| ((biased_exp << EXP_MASK_SHIFT) & EXP_MASK) // exponent
| (mantissa & FRACTION_MASK); // mantissa
return FPBits(rep);
}

// The function convert integer number and unbiased exponent to proper float
// T type:
// Result = number * 2^(ep+1 - exponent_bias)
Expand Down Expand Up @@ -452,15 +449,6 @@ template <typename T> struct FPBits : public internal::FPRep<get_fp_type<T>()> {
}
return result;
}

LIBC_INLINE static constexpr FPBits<T>
create_value(bool sign, StorageType biased_exp, StorageType mantissa) {
FPBits<T> result;
result.set_sign(sign);
result.set_biased_exponent(biased_exp);
result.set_mantissa(mantissa);
return result;
}
};

} // namespace fputil
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/fpbits_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ template <typename T> LIBC_INLINE cpp::string str(fputil::FPBits<T> x) {

cpp::string s;

const details::ZeroPaddedHexFmt<StorageType> bits(x.bits);
const details::ZeroPaddedHexFmt<StorageType> bits(x.uintval());
s += bits.view();

s += " = (S: ";
Expand Down
140 changes: 67 additions & 73 deletions libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,70 +30,69 @@ template <>
struct FPBits<long double> : public internal::FPRep<FPType::X86_Binary80> {
using UP = internal::FPRep<FPType::X86_Binary80>;
using StorageType = typename UP::StorageType;
using UP::bits;

private:
using UP::bits;
using UP::EXP_SIG_MASK;
using UP::QUIET_NAN_MASK;

public:
using UP::EXP_BIAS;
using UP::EXP_LEN;
using UP::EXP_MASK;
using UP::EXP_MASK_SHIFT;
using UP::FP_MASK;
using UP::FRACTION_LEN;
using UP::FRACTION_MASK;
using UP::SIGN_MASK;
using UP::TOTAL_LEN;

static constexpr int MAX_BIASED_EXPONENT = 0x7FFF;
// Constants.
static constexpr int MAX_BIASED_EXPONENT = (1 << EXP_LEN) - 1;
// The x86 80 bit float represents the leading digit of the mantissa
// explicitly. This is the mask for that bit.
static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1)
<< FRACTION_LEN;
// The X80 significand is made of an explicit bit and the fractional part.
static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0,
"the explicit bit and the fractional part should not overlap");
static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK,
"the explicit bit and the fractional part should cover the "
"whole significand");
static constexpr StorageType MIN_SUBNORMAL = StorageType(1);
// Subnormal numbers include the implicit bit in x86 long double formats.
static constexpr StorageType MAX_SUBNORMAL =
(StorageType(1) << FRACTION_LEN) - 1;
static constexpr StorageType MIN_NORMAL = (StorageType(3) << FRACTION_LEN);
static constexpr StorageType MAX_SUBNORMAL = FRACTION_MASK;
static constexpr StorageType MIN_NORMAL =
(StorageType(1) << SIG_LEN) | EXPLICIT_BIT_MASK;
static constexpr StorageType MAX_NORMAL =
(StorageType(MAX_BIASED_EXPONENT - 1) << (FRACTION_LEN + 1)) |
(StorageType(1) << FRACTION_LEN) | MAX_SUBNORMAL;

LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
// The x86 80 bit float represents the leading digit of the mantissa
// explicitly. This is the mask for that bit.
constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1) << FRACTION_LEN;
return bits & (FRACTION_MASK | EXPLICIT_BIT_MASK);
(StorageType(MAX_BIASED_EXPONENT - 1) << SIG_LEN) | SIG_MASK;

// Constructors.
LIBC_INLINE constexpr FPBits() = default;

template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) {
using Unqual = typename cpp::remove_cv_t<XType>;
if constexpr (cpp::is_same_v<Unqual, long double>) {
bits = cpp::bit_cast<StorageType>(x);
} else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
bits = x;
} else {
// We don't want accidental type promotions/conversions, so we require
// exact type match.
static_assert(cpp::always_false<XType>);
}
}

LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
bits &= ~(StorageType(1) << FRACTION_LEN);
bits |= (StorageType(implicitVal) << FRACTION_LEN);
// Floating-point conversions.
LIBC_INLINE constexpr long double get_val() const {
return cpp::bit_cast<long double>(bits);
}

LIBC_INLINE constexpr bool get_implicit_bit() const {
return bool((bits & (StorageType(1) << FRACTION_LEN)) >> FRACTION_LEN);
LIBC_INLINE constexpr operator long double() const {
return cpp::bit_cast<long double>(bits);
}

LIBC_INLINE constexpr FPBits() : UP() {}

template <typename XType,
cpp::enable_if_t<cpp::is_same_v<long double, XType>, int> = 0>
LIBC_INLINE constexpr explicit FPBits(XType x)
: UP(cpp::bit_cast<StorageType>(x)) {
// bits starts uninitialized, and setting it to a long double only
// overwrites the first 80 bits. This clears those upper bits.
bits = bits & ((StorageType(1) << 80) - 1);
LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
return bits & SIG_MASK;
}

template <typename XType,
cpp::enable_if_t<cpp::is_same_v<XType, StorageType>, int> = 0>
LIBC_INLINE constexpr explicit FPBits(XType x) : UP(x) {}

LIBC_INLINE constexpr operator long double() {
return cpp::bit_cast<long double>(bits);
LIBC_INLINE constexpr bool get_implicit_bit() const {
return bits & EXPLICIT_BIT_MASK;
}

LIBC_INLINE constexpr long double get_val() const {
return cpp::bit_cast<long double>(bits);
LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
if (get_implicit_bit() != implicitVal)
bits ^= EXPLICIT_BIT_MASK;
}

LIBC_INLINE constexpr bool is_inf() const {
Expand All @@ -117,34 +116,26 @@ struct FPBits<long double> : public internal::FPRep<FPType::X86_Binary80> {

// Methods below this are used by tests.

LIBC_INLINE static constexpr long double zero() { return 0.0l; }
LIBC_INLINE static constexpr long double zero(bool sign = false) {
StorageType rep = (sign ? SIGN_MASK : StorageType(0)) // sign
| 0 // exponent
| 0 // explicit bit
| 0; // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr long double neg_zero() { return -0.0l; }
LIBC_INLINE static constexpr long double neg_zero() { return zero(true); }

LIBC_INLINE static constexpr long double inf(bool sign = false) {
FPBits<long double> bits(0.0l);
bits.set_biased_exponent(MAX_BIASED_EXPONENT);
bits.set_implicit_bit(1);
if (sign) {
bits.set_sign(true);
}
return bits.get_val();
StorageType rep = (sign ? SIGN_MASK : StorageType(0)) // sign
| EXP_MASK // exponent
| EXPLICIT_BIT_MASK // explicit bit
| 0; // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr long double neg_inf() { return inf(true); }

LIBC_INLINE static constexpr long double build_nan(StorageType v) {
FPBits<long double> bits(0.0l);
bits.set_biased_exponent(MAX_BIASED_EXPONENT);
bits.set_implicit_bit(1);
bits.set_mantissa(v);
return bits;
}

LIBC_INLINE static constexpr long double build_quiet_nan(StorageType v) {
return build_nan(QUIET_NAN_MASK | v);
}

LIBC_INLINE static constexpr long double min_normal() {
return FPBits(MIN_NORMAL).get_val();
}
Expand All @@ -161,13 +152,16 @@ struct FPBits<long double> : public internal::FPRep<FPType::X86_Binary80> {
return FPBits(MAX_SUBNORMAL).get_val();
}

LIBC_INLINE static constexpr FPBits<long double>
create_value(bool sign, StorageType biased_exp, StorageType mantissa) {
FPBits<long double> result;
result.set_sign(sign);
result.set_biased_exponent(biased_exp);
result.set_mantissa(mantissa);
return result;
LIBC_INLINE static constexpr long double build_nan(StorageType v) {
StorageType rep = 0 // sign
| EXP_MASK // exponent
| EXPLICIT_BIT_MASK // explicit bit
| (v & FRACTION_MASK); // mantissa
return FPBits(rep).get_val();
}

LIBC_INLINE static constexpr long double build_quiet_nan(StorageType v) {
return build_nan(QUIET_NAN_MASK | v);
}
};

Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
return x;
}
// Normalize denormal inputs.
xbits.set_val(x * 0x1.0p52);
xbits = FPBits_t(x * 0x1.0p52);
x_e -= 52;
x_u = xbits.uintval();
}
Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/log10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) {
return x;
}
// Normalize denormal inputs.
xbits.set_val(x * 0x1.0p52);
xbits = FPBits_t(x * 0x1.0p52);
x_e -= 52;
x_u = xbits.uintval();
}
Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/log10f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
return x;
}
// Normalize denormal inputs.
xbits.set_val(xbits.get_val() * 0x1.0p23f);
xbits = FPBits(xbits.get_val() * 0x1.0p23f);
m -= 23;
x_u = xbits.uintval();
}
Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/log2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ LLVM_LIBC_FUNCTION(double, log2, (double x)) {
return x;
}
// Normalize denormal inputs.
xbits.set_val(x * 0x1.0p52);
xbits = FPBits_t(x * 0x1.0p52);
x_e -= 52;
x_u = xbits.uintval();
}
Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/log2f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
return x;
}
// Normalize denormal inputs.
xbits.set_val(xbits.get_val() * 0x1.0p23f);
xbits = FPBits(xbits.get_val() * 0x1.0p23f);
m -= 23;
}

Expand Down
2 changes: 1 addition & 1 deletion libc/src/math/generic/logf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
return static_cast<float>(FPBits::neg_inf());
}
// Normalize denormal inputs.
xbits.set_val(xbits.get_val() * 0x1.0p23f);
xbits = FPBits(xbits.get_val() * 0x1.0p23f);
m -= 23;
x_u = xbits.uintval();
}
Expand Down
12 changes: 6 additions & 6 deletions libc/src/signal/linux/signal_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,12 @@ namespace LIBC_NAMESPACE {
// handler taking siginfo_t * argument, one can set sa_handler to sa_sigaction
// if SA_SIGINFO is set in sa_flags.
struct KernelSigaction {
using HandlerType = void(int);
using SiginfoHandlerType = void(int, siginfo_t *, void *);

LIBC_INLINE KernelSigaction &operator=(const struct sigaction &sa) {
sa_flags = sa.sa_flags;
sa_restorer = sa.sa_restorer;
sa_mask = sa.sa_mask;
if (sa_flags & SA_SIGINFO) {
sa_handler = reinterpret_cast<HandlerType *>(sa.sa_sigaction);
sa_sigaction = sa.sa_sigaction;
} else {
sa_handler = sa.sa_handler;
}
Expand All @@ -48,13 +45,16 @@ struct KernelSigaction {
sa.sa_mask = sa_mask;
sa.sa_restorer = sa_restorer;
if (sa_flags & SA_SIGINFO)
sa.sa_sigaction = reinterpret_cast<SiginfoHandlerType *>(sa_handler);
sa.sa_sigaction = sa_sigaction;
else
sa.sa_handler = sa_handler;
return sa;
}

HandlerType *sa_handler;
union {
void (*sa_handler)(int);
void (*sa_sigaction)(int, siginfo_t *, void *);
};
unsigned long sa_flags;
void (*sa_restorer)(void);
// Our public definition of sigset_t matches that of the kernel's definition.
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/stdlib/strtold_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class LlvmLibcStrToLDTest : public LIBC_NAMESPACE::testing::Test {

EXPECT_EQ(str_end - inputString, expectedStrLen);

EXPECT_EQ(actual_fp.bits, expected_fp.bits);
EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign());
EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent());
EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa());
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/time/difftime_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ TEST(LlvmLibcDifftime, SmokeTest) {
actual_fp = LIBC_NAMESPACE::fputil::FPBits<long double>(
static_cast<long double>(result));

EXPECT_EQ(actual_fp.bits, expected_fp.bits);
EXPECT_EQ(actual_fp.uintval(), expected_fp.uintval());
EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign());
EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent());
EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa());
Expand Down
8 changes: 4 additions & 4 deletions libcxx/include/__format/format_arg_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,15 @@ _LIBCPP_HIDE_FROM_ABI void __store_basic_format_arg(basic_format_arg<_Context>*
([&] { *__data++ = __format::__create_format_arg<_Context>(__args); }(), ...);
}

template <class _Context, size_t N>
template <class _Context, size_t _Np>
struct __packed_format_arg_store {
__basic_format_arg_value<_Context> __values_[N];
__basic_format_arg_value<_Context> __values_[_Np];
uint64_t __types_ = 0;
};

template <class _Context, size_t N>
template <class _Context, size_t _Np>
struct __unpacked_format_arg_store {
basic_format_arg<_Context> __args_[N];
basic_format_arg<_Context> __args_[_Np];
};

} // namespace __format
Expand Down
3 changes: 3 additions & 0 deletions libcxx/test/libcxx/system_reserved_names.gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@
#define E SYSTEM_RESERVED_NAME
#define Ep SYSTEM_RESERVED_NAME
#define Es SYSTEM_RESERVED_NAME
#define N SYSTEM_RESERVED_NAME
#define Np SYSTEM_RESERVED_NAME
#define Ns SYSTEM_RESERVED_NAME
#define R SYSTEM_RESERVED_NAME
#define Rp SYSTEM_RESERVED_NAME
#define Rs SYSTEM_RESERVED_NAME
Expand Down
12 changes: 5 additions & 7 deletions lldb/source/Breakpoint/BreakpointResolverAddress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,11 @@ BreakpointResolverAddress::SerializeToStructuredData() {
new StructuredData::Dictionary());
SectionSP section_sp = m_addr.GetSection();
if (section_sp) {
ModuleSP module_sp = section_sp->GetModule();
ConstString module_name;
if (module_sp)
module_name.SetCString(module_name.GetCString());

options_dict_sp->AddStringItem(GetKey(OptionNames::ModuleName),
module_name.GetCString());
if (ModuleSP module_sp = section_sp->GetModule()) {
const FileSpec &module_fspec = module_sp->GetFileSpec();
options_dict_sp->AddStringItem(GetKey(OptionNames::ModuleName),
module_fspec.GetPath().c_str());
}
options_dict_sp->AddIntegerItem(GetKey(OptionNames::AddressOffset),
m_addr.GetOffset());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
using namespace lldb;
using namespace lldb_private;

using GetThreadDescriptionFunctionPtr = HRESULT
WINAPI (*)(HANDLE hThread, PWSTR *ppszThreadDescription);
using GetThreadDescriptionFunctionPtr =
HRESULT(WINAPI *)(HANDLE hThread, PWSTR *ppszThreadDescription);

TargetThreadWindows::TargetThreadWindows(ProcessWindows &process,
const HostThread &thread)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,42 @@ def test_scripted_extra_args(self):
self.setup_targets_and_cleanup()
self.do_check_extra_args()

def test_resolver_serialization(self):
"""Test that breakpoint resolvers contain the expected information"""
self.build()
self.setup_targets_and_cleanup()

exe_path = self.getBuildArtifact("a.out")
exe_module = self.orig_target.module["a.out"]
self.assertTrue(
exe_module.IsValid(), "Failed to find the executable module in target"
)
sym_ctx_list = exe_module.FindFunctions("main")
self.assertTrue(sym_ctx_list.GetSize() == 1, "Unable to find function 'main'")
sym_ctx = sym_ctx_list.GetContextAtIndex(0)
self.assertTrue(
sym_ctx.IsValid(), "SBSymbolContext representing function 'main' is invalid"
)
main_func = sym_ctx.GetFunction()
self.assertTrue(
main_func.IsValid(), "SBFunction representing 'main' is invalid"
)
main_addr = main_func.GetStartAddress()

bkpt = self.orig_target.BreakpointCreateBySBAddress(main_addr)
self.assertTrue(
bkpt.IsValid(), "Could not place breakpoint on 'main' by address"
)
stream = lldb.SBStream()
sd = bkpt.SerializeToStructuredData()
sd.GetAsJSON(stream)
serialized_data = json.loads(stream.GetData())

self.assertIn(
exe_path,
serialized_data["Breakpoint"]["BKPTResolver"]["Options"]["ModuleName"],
)

def test_structured_data_serialization(self):
target = self.dbg.GetDummyTarget()
self.assertTrue(target.IsValid(), VALID_TARGET)
Expand Down
4 changes: 2 additions & 2 deletions lldb/unittests/Thread/ThreadTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ using namespace lldb;
namespace {

#ifdef _WIN32
using SetThreadDescriptionFunctionPtr = HRESULT
WINAPI (*)(HANDLE hThread, PCWSTR lpThreadDescription);
using SetThreadDescriptionFunctionPtr =
HRESULT(WINAPI *)(HANDLE hThread, PCWSTR lpThreadDescription);

static SetThreadDescriptionFunctionPtr SetThreadName;
#endif
Expand Down
3 changes: 3 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4114,6 +4114,9 @@ Code object V5 metadata is the same as
buffer that conforms to the requirements of the malloc/free
device library V1 version implementation.

"hidden_dynamic_lds_size"
Size of the dynamically allocated LDS memory is passed in the kernarg.

"hidden_private_base"
The high 32 bits of the flat addressing private aperture base.
Only used by GFX8 to allow conversion between private segment
Expand Down
4 changes: 2 additions & 2 deletions llvm/docs/CMake.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1118,10 +1118,10 @@ And then changing ``<project dir>/<pass name>/CMakeLists.txt`` to
When you are done developing your pass, you may wish to integrate it
into the LLVM source tree. You can achieve it in two easy steps:

#. Copying ``<pass name>`` folder into ``<LLVM root>/lib/Transform`` directory.
#. Copying ``<pass name>`` folder into ``<LLVM root>/lib/Transforms`` directory.

#. Adding ``add_subdirectory(<pass name>)`` line into
``<LLVM root>/lib/Transform/CMakeLists.txt``.
``<LLVM root>/lib/Transforms/CMakeLists.txt``.

Compiler/Platform-specific topics
=================================
Expand Down
84 changes: 84 additions & 0 deletions llvm/docs/Extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,90 @@ Example:
.uleb128 .LBB_END0_1-.LBB0_1 # BB_1 size
.byte y # BB_1 metadata

PGO Analysis Map
""""""""""""""""

PGO related analysis data can be emitted after each function within the
``SHT_LLVM_BB_ADDR_MAP`` through the optional ``pgo-analysis-map`` flag.
Supported analyses currently are Function Entry Count, Basic Block Frequencies,
and Branch Probabilities.

Each analysis is enabled or disabled via a bit in the feature byte. Currently
those bits are:

#. Function Entry Count - Number of times the function was called as taken
from a PGO profile. This will always be zero if PGO was not used or the
function was not encountered in the profile.

#. Basic Block Frequencies - Encoded as raw block frequency value taken from
MBFI analysis. This value is an integer that encodes the relative frequency
compared to the entry block. More information can be found in
'llvm/Support/BlockFrequency.h'.

#. Branch Probabilities - Encoded as raw numerator for branch probability
taken from MBPI analysis. This value is the numerator for a fixed point ratio
defined in 'llvm/Support/BranchProbability.h'. It indicates the probability
that the block is followed by a given successor block during execution.

This extra data requires version 2 or above. This is necessary since successors
of basic blocks won't know their index but will know their BB ID.

Example of BBAddrMap with PGO data:

.. code-block:: gas

.section ".llvm_bb_addr_map","",@llvm_bb_addr_map
.byte 2 # version number
.byte 7 # feature byte - PGO analyses enabled mask
.quad .Lfunc_begin0 # address of the function
.uleb128 4 # number of basic blocks
# BB record for BB_0
.uleb128 0 # BB_0 BB ID
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # BB_0 offset relative to function entry (always zero)
.uleb128 .LBB_END0_0-.Lfunc_begin0 # BB_0 size
.byte 0x18 # BB_0 metadata (multiple successors)
# BB record for BB_1
.uleb128 1 # BB_1 BB ID
.uleb128 .LBB0_1-.LBB_END0_0 # BB_1 offset relative to the end of last block (BB_0).
.uleb128 .LBB_END0_1-.LBB0_1 # BB_1 size
.byte 0x0 # BB_1 metadata (two successors)
# BB record for BB_2
.uleb128 2 # BB_2 BB ID
.uleb128 .LBB0_2-.LBB_END1_0 # BB_2 offset relative to the end of last block (BB_1).
.uleb128 .LBB_END0_2-.LBB0_2 # BB_2 size
.byte 0x0 # BB_2 metadata (one successor)
# BB record for BB_3
.uleb128 3 # BB_3 BB ID
.uleb128 .LBB0_3-.LBB_END0_2 # BB_3 offset relative to the end of last block (BB_2).
.uleb128 .LBB_END0_3-.LBB0_3 # BB_3 size
.byte 0x0 # BB_3 metadata (zero successors)
# PGO Analysis Map
.uleb128 1000 # function entry count (only when enabled)
# PGO data record for BB_0
.uleb128 1000 # BB_0 basic block frequency (only when enabled)
.uleb128 3 # BB_0 successors count (only enabled with branch probabilities)
.uleb128 1 # BB_0 successor 1 BB ID (only enabled with branch probabilities)
.uleb128 0x22222222 # BB_0 successor 1 branch probability (only enabled with branch probabilities)
.uleb128 2 # BB_0 successor 2 BB ID (only enabled with branch probabilities)
.uleb128 0x33333333 # BB_0 successor 2 branch probability (only enabled with branch probabilities)
.uleb128 3 # BB_0 successor 3 BB ID (only enabled with branch probabilities)
.uleb128 0xaaaaaaaa # BB_0 successor 3 branch probability (only enabled with branch probabilities)
# PGO data record for BB_1
.uleb128 133 # BB_1 basic block frequency (only when enabled)
.uleb128 2 # BB_1 successors count (only enabled with branch probabilities)
.uleb128 2 # BB_1 successor 1 BB ID (only enabled with branch probabilities)
.uleb128 0x11111111 # BB_1 successor 1 branch probability (only enabled with branch probabilities)
.uleb128 3 # BB_1 successor 2 BB ID (only enabled with branch probabilities)
.uleb128 0x11111111 # BB_1 successor 2 branch probability (only enabled with branch probabilities)
# PGO data record for BB_2
.uleb128 18 # BB_2 basic block frequency (only when enabled)
.uleb128 1 # BB_2 successors count (only enabled with branch probabilities)
.uleb128 3 # BB_2 successor 1 BB ID (only enabled with branch probabilities)
.uleb128 0xffffffff # BB_2 successor 1 branch probability (only enabled with branch probabilities)
# PGO data record for BB_3
.uleb128 1000 # BB_3 basic block frequency (only when enabled)
.uleb128 0 # BB_3 successors count (only enabled with branch probabilities)

``SHT_LLVM_OFFLOADING`` Section (offloading data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This section stores the binary data used to perform offloading device linking
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (TargetType->isScalableTy())
return TTI::TCC_Basic;
int64_t ElementSize =
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
GTI.getSequentialElementStride(DL).getFixedValue();
if (ConstIdx) {
BaseOffset +=
ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ struct TypePairAndMemDesc {
}
};

/// True iff P is false.
template <typename Predicate> Predicate predNot(Predicate P) {
return [=](const LegalityQuery &Query) { return !P(Query); };
}

/// True iff P0 and P1 are true.
template<typename Predicate>
Predicate all(Predicate P0, Predicate P1) {
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/RuntimeLibcalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ namespace RTLIB {
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);

/// Return the outline atomics value for the given atomic ordering, access
/// size and set of libcalls for a given atomic, or UNKNOWN_LIBCALL if there
/// is none.
Libcall getOutlineAtomicHelper(const Libcall (&LC)[5][4],
AtomicOrdering Order, uint64_t MemSize);

/// Return the outline atomics value for the given opcode, atomic ordering
/// and type, or UNKNOWN_LIBCALL if there is none.
Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
Expand Down
57 changes: 54 additions & 3 deletions llvm/include/llvm/IR/GetElementPtrTypeIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
Expand All @@ -30,7 +31,39 @@ template <typename ItTy = User::const_op_iterator>
class generic_gep_type_iterator {

ItTy OpIt;
PointerUnion<StructType *, Type *> CurTy;
// We use two different mechanisms to store the type a GEP index applies to.
// In some cases, we need to know the outer aggregate type the index is
// applied within, e.g. a struct. In such cases, we store the aggregate type
// in the iterator, and derive the element type on the fly.
//
// However, this is not always possible, because for the outermost index there
// is no containing type. In such cases, or if the containing type is not
// relevant, e.g. for arrays, the element type is stored as Type* in CurTy.
//
// If CurTy contains a Type* value, this does not imply anything about the
// type itself, because it is the element type and not the outer type.
// In particular, Type* can be a struct type.
//
// Consider this example:
//
// %my.struct = type { i32, [ 4 x float ] }
// [...]
// %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3
//
// Iterating over the indices of this GEP, CurTy will contain the following
// values:
// * i32 10: The outer index always operates on the GEP value type.
// CurTy contains a Type* pointing at `%my.struct`.
// * i32 1: This index is within a struct.
// CurTy contains a StructType* pointing at `%my.struct`.
// * i32 3: This index is within an array. We reuse the "flat" indexing
// for arrays which is also used in the top level GEP index.
// CurTy contains a Type* pointing at `float`.
//
// Vectors are handled separately because the layout of vectors is different
// for overaligned elements: Vectors are always bit-packed, whereas arrays
// respect ABI alignment of the elements.
PointerUnion<StructType *, VectorType *, Type *> CurTy;

generic_gep_type_iterator() = default;

Expand Down Expand Up @@ -69,6 +102,8 @@ class generic_gep_type_iterator {
Type *getIndexedType() const {
if (auto *T = dyn_cast_if_present<Type *>(CurTy))
return T;
if (auto *VT = dyn_cast_if_present<VectorType *>(CurTy))
return VT->getElementType();
return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand());
}

Expand All @@ -79,7 +114,7 @@ class generic_gep_type_iterator {
if (auto *ATy = dyn_cast<ArrayType>(Ty))
CurTy = ATy->getElementType();
else if (auto *VTy = dyn_cast<VectorType>(Ty))
CurTy = VTy->getElementType();
CurTy = VTy;
else
CurTy = dyn_cast<StructType>(Ty);
++OpIt;
Expand Down Expand Up @@ -108,7 +143,23 @@ class generic_gep_type_iterator {
// that.

bool isStruct() const { return isa<StructType *>(CurTy); }
bool isSequential() const { return isa<Type *>(CurTy); }
bool isVector() const { return isa<VectorType *>(CurTy); }
bool isSequential() const { return !isStruct(); }

// For sequential GEP indices (all except those into structs), the index value
// can be translated into a byte offset by multiplying with an element stride.
// This function returns this stride, which both depends on the element type,
// and the containing aggregate type, as vectors always tightly bit-pack their
// elements.
TypeSize getSequentialElementStride(const DataLayout &DL) const {
assert(isSequential());
Type *ElemTy = getIndexedType();
if (isVector()) {
assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable");
return DL.getTypeStoreSize(ElemTy);
}
return DL.getTypeAllocSize(ElemTy);
}

StructType *getStructType() const { return cast<StructType *>(CurTy); }

Expand Down
7 changes: 4 additions & 3 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
// All dimension-aware intrinsics are derived from this class.
class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
list<IntrinsicProperty> props,
list<SDNodeProperty> sdnodeprops> : DefaultAttrsIntrinsic<
list<SDNodeProperty> sdnodeprops> : Intrinsic<
P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return
!listconcat(
!foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic
Expand All @@ -851,11 +851,12 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
// gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
// TODO-GFX12: Update all other cachepolicy descriptions.

!listconcat(props,
!listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn],
!if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
!if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []),
[ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>,
ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]),
ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>],
!if(P_.IsAtomic, [], [IntrNoSync])),


"", sdnodeprops>,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;

// Don't attempt to analyze GEPs if the scalable index is not zero.
TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
if (AllocTypeSize.isScalable()) {
Decomposed.Base = V;
return Decomposed;
Expand All @@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;
}

TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
if (AllocTypeSize.isScalable()) {
Decomposed.Base = V;
return Decomposed;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
continue;
}

APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
// Convert to correct type.
if (Op->getType() != IntIdxTy)
Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize TSize = GTI.getSequentialElementStride(DL);
if (TSize != TypeSize::getFixed(1)) {
Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
if (IntIdxTy->isVectorTy())
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {

// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
TypeSize ElemSize = GEPTI.isStruct()
? DL.getTypeAllocSize(GEPTI.getIndexedType())
: GEPTI.getSequentialElementStride(DL);
if (ElemSize != GEPAllocSize)
break;
--LastOperand;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
KnownBits IndexBits(IndexBitWidth);
computeKnownBits(Index, IndexBits, Depth + 1, Q);
TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
KnownBits ScalingFactor(IndexBitWidth);
// Multiply by current sizeof type.
Expand Down Expand Up @@ -2128,7 +2128,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
}

// If we have a zero-sized type, the index doesn't matter. Keep looping.
if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero())
if (GTI.getSequentialElementStride(Q.DL).isZero())
continue;

// Fast path the constant operand case both for efficiency and so we don't
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
.Case("hidden_multigrid_sync_arg", true)
.Case("hidden_dynamic_lds_size", true)
.Case("hidden_private_base", true)
.Case("hidden_shared_base", true)
.Case("hidden_queue_ptr", true)
Expand Down
80 changes: 77 additions & 3 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -140,6 +141,26 @@ static cl::opt<std::string> BasicBlockProfileDump(
"performed with -basic-block-sections=labels. Enabling this "
"flag during in-process ThinLTO is not supported."));

// This is a replication of fields of object::PGOAnalysisMap::Features. It
// should match the order of the fields so that
// `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())`
// succeeds.
enum class PGOMapFeaturesEnum {
FuncEntryCount,
BBFreq,
BrProb,
};
static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
"pgo-analysis-map", cl::Hidden, cl::CommaSeparated,
cl::values(clEnumValN(PGOMapFeaturesEnum::FuncEntryCount,
"func-entry-count", "Function Entry Count"),
clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
"Basic Block Frequency"),
clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob",
"Branch Probability")),
cl::desc("Enable extended information within the BBAddrMap that is "
"extracted from PGO related analysis."));

const char DWARFGroupName[] = "dwarf";
const char DWARFGroupDescription[] = "DWARF Emission";
const char DbgTimerName[] = "emit";
Expand Down Expand Up @@ -428,6 +449,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<MachineBranchProbabilityInfo>();
}

bool AsmPrinter::doInitialization(Module &M) {
Expand Down Expand Up @@ -1379,7 +1401,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
OutStreamer->emitInt8(BBAddrMapVersion);
OutStreamer->AddComment("feature");
OutStreamer->emitInt8(0);
auto FeaturesBits = static_cast<uint8_t>(PgoAnalysisMapFeatures.getBits());
OutStreamer->emitInt8(FeaturesBits);
OutStreamer->AddComment("function address");
OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
OutStreamer->AddComment("number of basic blocks");
Expand Down Expand Up @@ -1409,6 +1432,51 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
PrevMBBEndSymbol = MBB.getEndSymbol();
}

if (FeaturesBits != 0) {
assert(BBAddrMapVersion >= 2 &&
"PGOAnalysisMap only supports version 2 or later");

auto FeatEnable =
cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits));

if (FeatEnable.FuncEntryCount) {
OutStreamer->AddComment("function entry count");
auto MaybeEntryCount = MF.getFunction().getEntryCount();
OutStreamer->emitULEB128IntValue(
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
}
const MachineBlockFrequencyInfo *MBFI =
FeatEnable.BBFreq
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
const MachineBranchProbabilityInfo *MBPI =
FeatEnable.BrProb ? &getAnalysis<MachineBranchProbabilityInfo>()
: nullptr;

if (FeatEnable.BBFreq || FeatEnable.BrProb) {
for (const MachineBasicBlock &MBB : MF) {
if (FeatEnable.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
MBFI->getBlockFreq(&MBB).getFrequency());
}
if (FeatEnable.BrProb) {
unsigned SuccCount = MBB.succ_size();
OutStreamer->AddComment("basic block successor count");
OutStreamer->emitULEB128IntValue(SuccCount);
for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
OutStreamer->AddComment("successor BB ID");
OutStreamer->emitULEB128IntValue(SuccMBB->getBBID()->BaseID);
OutStreamer->AddComment("successor branch probability");
OutStreamer->emitULEB128IntValue(
MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator());
}
}
}
}
}

OutStreamer->popSection();
}

Expand Down Expand Up @@ -1934,8 +2002,14 @@ void AsmPrinter::emitFunctionBody() {

// Emit section containing BB address offsets and their metadata, when
// BB labels are requested for this function. Skip empty functions.
if (MF->hasBBLabels() && HasAnyRealCode)
emitBBAddrMapSection(*MF);
if (HasAnyRealCode) {
if (MF->hasBBLabels())
emitBBAddrMapSection(*MF);
else if (PgoAnalysisMapFeatures.getBits() != 0)
MF->getContext().reportWarning(
SMLoc(), "pgo-analysis-map is enabled for function " + MF->getName() +
" but it does not have labels");
}

// Emit sections containing instruction and function PCs.
emitPCSections(*MF);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4776,7 +4776,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
TypeSize TS = GTI.getSequentialElementStride(DL);
if (TS.isNonZero()) {
// The optimisations below currently only work for fixed offsets.
if (TS.isScalable())
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,7 +1545,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
continue;
} else {
uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
uint64_t ElementSize = GTI.getSequentialElementStride(*DL);

// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
Expand Down
165 changes: 165 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
Expand Down Expand Up @@ -532,6 +533,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
RTLIBCASE(POW_F);
case TargetOpcode::G_FPOWI:
RTLIBCASE(POWI_F);
case TargetOpcode::G_FMA:
RTLIBCASE(FMA_F);
case TargetOpcode::G_FSIN:
Expand Down Expand Up @@ -794,6 +797,132 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}

static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
auto &AtomicMI = cast<GMemOperation>(MI);
auto &MMO = AtomicMI.getMMO();
auto Ordering = MMO.getMergedOrdering();
LLT MemType = MMO.getMemoryType();
uint64_t MemSize = MemType.getSizeInBytes();
if (MemType.isVector())
return RTLIB::UNKNOWN_LIBCALL;

#define LCALLS(A, B) \
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
#define LCALL5(A) \
LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
switch (Opc) {
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_XCHG: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_ADD:
case TargetOpcode::G_ATOMICRMW_SUB: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_AND: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_OR: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
case TargetOpcode::G_ATOMICRMW_XOR: {
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
return getOutlineAtomicHelper(LC, Ordering, MemSize);
}
default:
return RTLIB::UNKNOWN_LIBCALL;
}
#undef LCALLS
#undef LCALL5
}

static LegalizerHelper::LegalizeResult
createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();

Type *RetTy;
SmallVector<Register> RetRegs;
SmallVector<CallLowering::ArgInfo, 3> Args;
unsigned Opc = MI.getOpcode();
switch (Opc) {
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register Success;
LLT SuccessLLT;
auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
MI.getFirst4RegLLTs();
RetRegs.push_back(Ret);
RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
NewLLT) = MI.getFirst5RegLLTs();
RetRegs.push_back(Success);
RetTy = StructType::get(
Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
}
Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
break;
}
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
case TargetOpcode::G_ATOMICRMW_SUB:
case TargetOpcode::G_ATOMICRMW_AND:
case TargetOpcode::G_ATOMICRMW_OR:
case TargetOpcode::G_ATOMICRMW_XOR: {
auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
RetRegs.push_back(Ret);
RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
if (Opc == TargetOpcode::G_ATOMICRMW_AND)
Val =
MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
.getReg(0);
else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
Val =
MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
.getReg(0);
Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
break;
}
default:
llvm_unreachable("unsupported opcode");
}

auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
const char *Name = TLI.getLibcallName(RTLibcall);

// Unsupported libcall on the target.
if (!Name) {
LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
<< MIRBuilder.getTII().getName(Opc) << "\n");
return LegalizerHelper::UnableToLegalize;
}

CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);

std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;

return LegalizerHelper::Legalized;
}

static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
Expand Down Expand Up @@ -1014,6 +1143,27 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
case TargetOpcode::G_FPOWI: {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
Type *ITy = IntegerType::get(
Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
std::initializer_list<CallLowering::ArgInfo> Args = {
{MI.getOperand(1).getReg(), HLTy, 0},
{MI.getOperand(2).getReg(), ITy, 1}};
LegalizeResult Status =
createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
Args, LocObserver, &MI);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
Expand Down Expand Up @@ -1058,6 +1208,19 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
case TargetOpcode::G_ATOMICRMW_SUB:
case TargetOpcode::G_ATOMICRMW_AND:
case TargetOpcode::G_ATOMICRMW_OR:
case TargetOpcode::G_ATOMICRMW_XOR:
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
auto Status = createAtomicLibcall(MIRBuilder, MI);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
Expand Down Expand Up @@ -4557,6 +4720,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case G_SHUFFLE_VECTOR:
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
case G_FPOWI:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
default:
return UnableToLegalize;
}
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) {
}
}
} else {
Type *Ty = GTI.getIndexedType();

// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
// N = N + Offset
uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
TotalOffs += GTI.getSequentialElementStride(DL) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
Expand All @@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
}

// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
uint64_t ElementSize = GTI.getSequentialElementStride(DL);
Register IdxN = getRegForGEPIndex(Idx);
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,18 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
return true;

// Match GlobalAddresses
if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
if (auto *A = dyn_cast<GlobalAddressSDNode>(Base)) {
if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
if (A->getGlobal() == B->getGlobal()) {
Off += B->getOffset() - A->getOffset();
return true;
}

return false;
}

// Match Constants
if (auto *A = dyn_cast<ConstantPoolSDNode>(Base))
if (auto *A = dyn_cast<ConstantPoolSDNode>(Base)) {
if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) {
bool IsMatch =
A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry();
Expand All @@ -62,7 +65,8 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}
}

const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
return false;
}

// Match FrameIndexes.
if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
Expand All @@ -73,6 +77,7 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
// Non-equal FrameIndexes - If both frame indices are fixed
// we know their relative offsets and can compare them. Otherwise
// we must be conservative.
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
if (MFI.isFixedObjectIndex(A->getIndex()) &&
MFI.isFixedObjectIndex(B->getIndex())) {
Off += MFI.getObjectOffset(B->getIndex()) -
Expand All @@ -81,6 +86,7 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}
}
}

return false;
}

Expand All @@ -91,10 +97,13 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
const SelectionDAG &DAG, bool &IsAlias) {

BaseIndexOffset BasePtr0 = match(Op0, DAG);
BaseIndexOffset BasePtr1 = match(Op1, DAG);
if (!BasePtr0.getBase().getNode())
return false;

if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
BaseIndexOffset BasePtr1 = match(Op1, DAG);
if (!BasePtr1.getBase().getNode())
return false;

int64_t PtrDiff;
if (NumBytes0 && NumBytes1 &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4114,7 +4114,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
TypeSize ElementSize =
DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
GTI.getSequentialElementStride(DAG.getDataLayout());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
Expand Down
22 changes: 11 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1645,11 +1645,11 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
Depth + 1))
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
Depth + 1))
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
Expand All @@ -1675,11 +1675,11 @@ bool TargetLowering::SimplifyDemandedBits(
Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
Depth + 1))
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
Expand Down Expand Up @@ -3353,8 +3353,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// Try to transform the select condition based on the current demanded
// elements.
APInt UndefSel, UndefZero;
if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
APInt UndefSel, ZeroSel;
if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
Depth + 1))
return true;

Expand All @@ -3377,7 +3377,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// select value element.
APInt DemandedSel = DemandedElts & ~KnownZero;
if (DemandedSel != DemandedElts)
if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
Depth + 1))
return true;

Expand Down
40 changes: 25 additions & 15 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,27 +520,28 @@ RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
FREXP_PPCF128);
}

RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4],
AtomicOrdering Order,
uint64_t MemSize) {
unsigned ModeN, ModelN;
switch (VT.SimpleTy) {
case MVT::i8:
switch (MemSize) {
case 1:
ModeN = 0;
break;
case MVT::i16:
case 2:
ModeN = 1;
break;
case MVT::i32:
case 4:
ModeN = 2;
break;
case MVT::i64:
case 8:
ModeN = 3;
break;
case MVT::i128:
case 16:
ModeN = 4;
break;
default:
return UNKNOWN_LIBCALL;
return RTLIB::UNKNOWN_LIBCALL;
}

switch (Order) {
Expand All @@ -561,34 +562,43 @@ RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
return UNKNOWN_LIBCALL;
}

return LC[ModeN][ModelN];
}

RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
if (!VT.isScalarInteger())
return UNKNOWN_LIBCALL;
uint64_t MemSize = VT.getScalarSizeInBits() / 8;

#define LCALLS(A, B) \
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
#define LCALL5(A) \
LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
switch (Opc) {
case ISD::ATOMIC_CMP_SWAP: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_SWAP: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_ADD: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_OR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_CLR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
case ISD::ATOMIC_LOAD_XOR: {
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
return LC[ModeN][ModelN];
return getOutlineAtomicHelper(LC, Order, MemSize);
}
default:
return UNKNOWN_LIBCALL;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
assert(BitWidth == 64 && "Invalid index type for getelementptr");
Idx = (int64_t)IdxGV.IntVal.getZExtValue();
}
Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx;
Total += I.getSequentialElementStride(getDataLayout()) * Idx;
}
}

Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/IR/DataLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
// Add in the offset, as calculated by the structure layout info...
Result += Layout->getElementOffset(FieldNo);
} else {
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType());
if (int64_t ArrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
Result += ArrayIdx * GTI.getSequentialElementStride(*this);
}
}

Expand Down
Loading