68 changes: 34 additions & 34 deletions clang/test/SemaTemplate/deduction-guide.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ using BT = B<char, 'x'>;
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 0 T
// CHECK: |-NonTypeTemplateParmDecl {{.*}} 'T' depth 0 index 1 V
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 2 U
// CHECK: |-NonTypeTemplateParmDecl {{.*}} 'type-parameter-0-2' depth 0 index 3 W
// CHECK: |-NonTypeTemplateParmDecl {{.*}} 'U' depth 0 index 3 W
// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (X<W, V>) -> B<T, V>'
// CHECK: | `-ParmVarDecl {{.*}} 'X<W, V>'
// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (X<nullptr, 'x'>) -> B<char, 'x'>'
Expand All @@ -81,7 +81,7 @@ using BT = B<char, 'x'>;
// CHECK: |-InjectedClassNameType {{.*}} 'B<T, V>' dependent
// CHECK: `-TemplateSpecializationType {{.*}} 'X<W, V>' dependent
// CHECK: |-TemplateArgument expr
// CHECK: | `-DeclRefExpr {{.*}} 'type-parameter-0-2' NonTypeTemplateParm {{.*}} 'W' 'type-parameter-0-2'
// CHECK: | `-DeclRefExpr {{.*}} 'U' NonTypeTemplateParm {{.*}} 'W' 'U'
// CHECK: `-TemplateArgument expr
// CHECK: `-DeclRefExpr {{.*}} 'T' NonTypeTemplateParm {{.*}} 'V' 'T'

Expand All @@ -99,13 +99,13 @@ using CT = C<int>;
// CHECK: | |-TemplateTypeParmDecl {{.*}} typename depth 1 index 0 X
// CHECK: | `-NonTypeTemplateParmDecl {{.*}} 'X' depth 1 index 1
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 2 U
// CHECK: |-NonTypeTemplateParmDecl {{.*}} 'type-parameter-0-2' depth 0 index 3 V
// CHECK: |-NonTypeTemplateParmDecl {{.*}} 'U' depth 0 index 3 V
// CHECK: | `-TemplateArgument {{.*}} expr
// CHECK: | `-IntegerLiteral {{.*}} 'int' 0
// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (A, Y<template-parameter-0-1>, type-parameter-0-2) -> C<A>'
// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (A, Y<T>, U) -> C<A>'
// CHECK: | |-ParmVarDecl {{.*}} 'A'
// CHECK: | |-ParmVarDecl {{.*}} 'Y<template-parameter-0-1>'
// CHECK: | `-ParmVarDecl {{.*}} 'type-parameter-0-2'
// CHECK: | |-ParmVarDecl {{.*}} 'Y<T>'
// CHECK: | `-ParmVarDecl {{.*}} 'U'
// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (int, Y<B>, int) -> C<int>'
// CHECK: |-TemplateArgument type 'int'
// CHECK: |-TemplateArgument template 'B'
Expand All @@ -114,20 +114,20 @@ using CT = C<int>;
// CHECK: |-ParmVarDecl {{.*}} 'int'
// CHECK: |-ParmVarDecl {{.*}} 'Y<B>'
// CHECK: `-ParmVarDecl {{.*}} 'int'
// CHECK: FunctionProtoType {{.*}} 'auto (A, Y<template-parameter-0-1>, type-parameter-0-2) -> C<A>' dependent trailing_return cdecl
// CHECK: FunctionProtoType {{.*}} 'auto (A, Y<T>, U) -> C<A>' dependent trailing_return cdecl
// CHECK: |-InjectedClassNameType {{.*}} 'C<A>' dependent
// CHECK: |-TemplateTypeParmType {{.*}} 'A' dependent depth 0 index 0
// CHECK: | `-TemplateTypeParm {{.*}} 'A'
// CHECK: |-ElaboratedType {{.*}} 'Y<template-parameter-0-1>' sugar dependent
// CHECK: | `-TemplateSpecializationType {{.*}} 'Y<template-parameter-0-1>' dependent
// CHECK: |-ElaboratedType {{.*}} 'Y<T>' sugar dependent
// CHECK: | `-TemplateSpecializationType {{.*}} 'Y<T>' dependent
// CHECK: | `-TemplateArgument template
// CHECK: `-TemplateTypeParmType {{.*}} 'type-parameter-0-2' dependent depth 0 index 2
// CHECK: `-TemplateTypeParmType {{.*}} 'U' dependent depth 0 index 2

template<typename ...T> struct D { // expected-note {{candidate}} \
// expected-note {{implicit deduction guide declared as 'template <typename ...T> D(D<T...>) -> D<T...>'}}
template<typename... U> using B = int(int (*...p)(T, U));
template<typename U1, typename U2> D(B<U1, U2>*); // expected-note {{candidate}} \
// expected-note {{implicit deduction guide declared as 'template <typename ...T, typename U1, typename U2> D(B<type-parameter-0-1, type-parameter-0-2> *) -> D<T...>'}}
// expected-note {{implicit deduction guide declared as 'template <typename ...T, typename U1, typename U2> D(B<U1, U2> *) -> D<T...>'}}
};
int f(int(int, int), int(int, int));
// FIXME: We can't deduce this because we can't deduce through a
Expand All @@ -141,14 +141,14 @@ using DT = D<int, int>;
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 0 ... T
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 1 U1
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 2 U2
// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (B<type-parameter-0-1, type-parameter-0-2> *) -> D<T...>'
// CHECK: `-ParmVarDecl {{.*}} 'B<type-parameter-0-1, type-parameter-0-2> *'
// CHECK: FunctionProtoType {{.*}} 'auto (B<type-parameter-0-1, type-parameter-0-2> *) -> D<T...>' dependent trailing_return
// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (B<U1, U2> *) -> D<T...>'
// CHECK: `-ParmVarDecl {{.*}} 'B<U1, U2> *'
// CHECK: FunctionProtoType {{.*}} 'auto (B<U1, U2> *) -> D<T...>' dependent trailing_return
// CHECK: |-InjectedClassNameType {{.*}} 'D<T...>' dependent
// CHECK: `-PointerType {{.*}} 'B<type-parameter-0-1, type-parameter-0-2> *' dependent
// CHECK: `-TemplateSpecializationType {{.*}} 'B<type-parameter-0-1, type-parameter-0-2>' sugar dependent alias
// CHECK: |-TemplateArgument type 'type-parameter-0-1'
// CHECK: |-TemplateArgument type 'type-parameter-0-2'
// CHECK: `-PointerType {{.*}} 'B<U1, U2> *' dependent
// CHECK: `-TemplateSpecializationType {{.*}} 'B<U1, U2>' sugar dependent alias
// CHECK: |-TemplateArgument type 'U1'
// CHECK: |-TemplateArgument type 'U2'
// CHECK: `-FunctionProtoType {{.*}} 'int (int (*)(T, U)...)' dependent cdecl
// CHECK: |-BuiltinType {{.*}} 'int'
// CHECK: `-PackExpansionType {{.*}} 'int (*)(T, U)...' dependent expansions 2
Expand Down Expand Up @@ -232,17 +232,17 @@ F s(0);
// CHECK: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 1 U
// CHECK: |-ParenExpr {{.*}} 'bool'
// CHECK: | `-CXXBoolLiteralExpr {{.*}} 'bool' false
// CHECK: |-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for F> 'auto (type-parameter-0-1) -> F<>'
// CHECK: | `-ParmVarDecl {{.*}} 'type-parameter-0-1'
// CHECK: |-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for F> 'auto (U) -> F<>'
// CHECK: | `-ParmVarDecl {{.*}} 'U'
// CHECK: `-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for F> 'auto (int) -> F<>'
// CHECK: |-TemplateArgument integral ''x''
// CHECK: |-TemplateArgument type 'int'
// CHECK: | `-BuiltinType {{.*}} 'int'
// CHECK: `-ParmVarDecl {{.*}} 'int'
// CHECK: FunctionProtoType {{.*}} 'auto (type-parameter-0-1) -> F<>' dependent trailing_return cdecl
// CHECK: FunctionProtoType {{.*}} 'auto (U) -> F<>' dependent trailing_return cdecl
// CHECK: |-InjectedClassNameType {{.*}} 'F<>' dependent
// CHECK: | `-CXXRecord {{.*}} 'F'
// CHECK: `-TemplateTypeParmType {{.*}} 'type-parameter-0-1' dependent depth 0 index 1
// CHECK: `-TemplateTypeParmType {{.*}} 'U' dependent depth 0 index 1

template<typename T>
struct G { T t; };
Expand All @@ -259,7 +259,7 @@ AG ag = {1};
// Verify that the aggregate deduction guide for alias templates is built.
// CHECK-LABEL: Dumping <deduction guide for AG>
// CHECK: FunctionTemplateDecl
// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (type-parameter-0-0) -> G<type-parameter-0-0>'
// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (X) -> G<X>'
// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (int) -> G<int>' implicit_instantiation
// CHECK: |-TemplateArgument type 'int'
// CHECK: | `-BuiltinType {{.*}} 'int'
Expand All @@ -281,24 +281,24 @@ struct Foo {
template <typename U>
using AFoo = Foo<G<U>>;
// Verify that the require-clause from the Foo deduction guide is transformed.
// The D occurrence should be rewritten to G<type-parameter-0-0>.
// The D occurrence should be rewritten to G<U>.
//
// CHECK-LABEL: Dumping <deduction guide for AFoo>
// CHECK: FunctionTemplateDecl {{.*}} implicit <deduction guide for AFoo>
// CHECK-NEXT: |-TemplateTypeParmDecl {{.*}} typename depth 0 index 0 U
// CHECK-NEXT: |-BinaryOperator {{.*}} '&&'
// CHECK-NEXT: | |-ParenExpr {{.*}} 'bool'
// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'bool' '=='
// CHECK-NEXT: | | |-UnaryExprOrTypeTraitExpr {{.*}} 'G<type-parameter-0-0>'
// CHECK-NEXT: | | |-UnaryExprOrTypeTraitExpr {{.*}} 'G<U>'
// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}}
// CHECK-NEXT: | | `-IntegerLiteral {{.*}}
// CHECK-NEXT: | `-TypeTraitExpr {{.*}} 'bool' __is_deducible
// CHECK-NEXT: | |-DeducedTemplateSpecializationType {{.*}} 'AFoo' dependent
// CHECK-NEXT: | | `-name: 'AFoo'
// CHECK-NEXT: | | `-TypeAliasTemplateDecl {{.+}} AFoo
// CHECK-NEXT: | `-TemplateSpecializationType {{.*}} 'Foo<G<type-parameter-0-0>>' dependent
// CHECK: |-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for AFoo> 'auto (G<type-parameter-0-0>) -> Foo<G<type-parameter-0-0>>'
// CHECK-NEXT: | `-ParmVarDecl {{.*}} 'G<type-parameter-0-0>'
// CHECK-NEXT: | `-TemplateSpecializationType {{.*}} 'Foo<G<U>>' dependent
// CHECK: |-CXXDeductionGuideDecl {{.*}} implicit <deduction guide for AFoo> 'auto (G<U>) -> Foo<G<U>>'
// CHECK-NEXT: | `-ParmVarDecl {{.*}} 'G<U>'
// CHECK-NEXT: `-CXXDeductionGuideDecl {{.*}} implicit used <deduction guide for AFoo> 'auto (G<int>) -> Foo<G<int>>' implicit_instantiation
// CHECK-NEXT: |-TemplateArgument type 'int'
// CHECK-NEXT: | `-BuiltinType {{.*}} 'int'
Expand All @@ -321,20 +321,20 @@ namespace TTP {
// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} class depth 0 index 0 T{{$}}
// CHECK-NEXT: |-TemplateTemplateParmDecl {{.+}} depth 0 index 1 TT{{$}}
// CHECK-NEXT: | `-TemplateTypeParmDecl {{.+}} class depth 1 index 0{{$}}
// CHECK-NEXT: |-CXXDeductionGuideDecl {{.+}} 'auto (template-parameter-0-1<T>) -> B<T>'{{$}}
// CHECK-NEXT: | `-ParmVarDecl {{.+}} 'template-parameter-0-1<T>'{{$}}
// CHECK-NEXT: |-CXXDeductionGuideDecl {{.+}} 'auto (TT<T>) -> B<T>'{{$}}
// CHECK-NEXT: | `-ParmVarDecl {{.+}} 'TT<T>'{{$}}
// CHECK-NEXT: `-CXXDeductionGuideDecl {{.+}} 'auto (A<int>) -> TTP::B<int>'
// CHECK-NEXT: |-TemplateArgument type 'int'
// CHECK-NEXT: | `-BuiltinType {{.+}} 'int'{{$}}
// CHECK-NEXT: |-TemplateArgument template 'TTP::A'{{$}}
// CHECK-NEXT: | `-ClassTemplateDecl {{.+}} A{{$}}
// CHECK-NEXT: `-ParmVarDecl {{.+}} 'A<int>':'TTP::A<int>'{{$}}
// CHECK-NEXT: FunctionProtoType {{.+}} 'auto (template-parameter-0-1<T>) -> B<T>' dependent trailing_return cdecl{{$}}
// CHECK-NEXT: FunctionProtoType {{.+}} 'auto (TT<T>) -> B<T>' dependent trailing_return cdecl{{$}}
// CHECK-NEXT: |-InjectedClassNameType {{.+}} 'B<T>' dependent{{$}}
// CHECK-NEXT: | `-CXXRecord {{.+}} 'B'{{$}}
// CHECK-NEXT: `-ElaboratedType {{.+}} 'template-parameter-0-1<T>' sugar dependent{{$}}
// CHECK-NEXT: `-TemplateSpecializationType {{.+}} 'template-parameter-0-1<T>' dependent{{$}}
// CHECK-NEXT: |-name: 'template-parameter-0-1' qualified
// CHECK-NEXT: `-ElaboratedType {{.+}} 'TT<T>' sugar dependent{{$}}
// CHECK-NEXT: `-TemplateSpecializationType {{.+}} 'TT<T>' dependent{{$}}
// CHECK-NEXT: |-name: 'TT':'template-parameter-0-1' qualified
// CHECK-NEXT: | `-TemplateTemplateParmDecl {{.+}} depth 0 index 1
// CHECK-NEXT: `-TemplateArgument type 'T':'type-parameter-0-0'{{$}}
// CHECK-NEXT: `-TemplateTypeParmType {{.+}} 'T' dependent depth 0 index 0{{$}}
Expand Down
13 changes: 6 additions & 7 deletions clang/tools/driver/cc1as_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,9 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
MCOptions.MCNoWarn = Opts.NoWarn;
MCOptions.MCFatalWarnings = Opts.FatalWarnings;
MCOptions.MCNoTypeCheck = Opts.NoTypeCheck;
MCOptions.ShowMCInst = Opts.ShowInst;
MCOptions.AsmVerbose = true;
MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
MCOptions.ABIName = Opts.TargetABI;

// FIXME: There is a bit of code duplication with addPassesToEmitFile.
Expand All @@ -545,10 +548,8 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));

auto FOut = std::make_unique<formatted_raw_ostream>(*Out);
Str.reset(TheTarget->createAsmStreamer(
Ctx, std::move(FOut), /*asmverbose*/ true,
/*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB),
Opts.ShowInst));
Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), IP,
std::move(CE), std::move(MAB)));
} else if (Opts.OutputType == AssemblerInvocation::FT_Null) {
Str.reset(createNullStreamer(Ctx));
} else {
Expand All @@ -571,9 +572,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,

Triple T(Opts.Triple);
Str.reset(TheTarget->createMCObjectStreamer(
T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI,
Opts.RelaxAll, Opts.IncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI));
Str.get()->initSections(Opts.NoExecStack, *STI);
}

Expand Down
20 changes: 12 additions & 8 deletions clang/unittests/AST/Interp/toAPValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ TEST(ToAPValue, Pointers) {
auto AST = tooling::buildASTFromCodeWithArgs(
Code, {"-fexperimental-new-constant-interpreter"});

auto &ASTCtx = AST->getASTContext();
auto &Ctx = AST->getASTContext().getInterpContext();
Program &Prog = Ctx.getProgram();

Expand All @@ -47,7 +48,7 @@ TEST(ToAPValue, Pointers) {
const Pointer &GP = getGlobalPtr("b");
const Pointer &P = GP.deref<Pointer>();
ASSERT_TRUE(P.isLive());
APValue A = P.toAPValue();
APValue A = P.toAPValue(ASTCtx);
ASSERT_TRUE(A.isLValue());
ASSERT_TRUE(A.hasLValuePath());
const auto &Path = A.getLValuePath();
Expand All @@ -62,7 +63,7 @@ TEST(ToAPValue, Pointers) {
const Pointer &GP = getGlobalPtr("p");
const Pointer &P = GP.deref<Pointer>();
ASSERT_TRUE(P.isIntegralPointer());
APValue A = P.toAPValue();
APValue A = P.toAPValue(ASTCtx);
ASSERT_TRUE(A.isLValue());
ASSERT_TRUE(A.getLValueBase().isNull());
APSInt I;
Expand All @@ -77,7 +78,7 @@ TEST(ToAPValue, Pointers) {
const Pointer &GP = getGlobalPtr("nullp");
const Pointer &P = GP.deref<Pointer>();
ASSERT_TRUE(P.isIntegralPointer());
APValue A = P.toAPValue();
APValue A = P.toAPValue(ASTCtx);
ASSERT_TRUE(A.isLValue());
ASSERT_TRUE(A.getLValueBase().isNull());
ASSERT_TRUE(A.isNullPointer());
Expand All @@ -96,6 +97,7 @@ TEST(ToAPValue, FunctionPointers) {
auto AST = tooling::buildASTFromCodeWithArgs(
Code, {"-fexperimental-new-constant-interpreter"});

auto &ASTCtx = AST->getASTContext();
auto &Ctx = AST->getASTContext().getInterpContext();
Program &Prog = Ctx.getProgram();

Expand All @@ -117,7 +119,7 @@ TEST(ToAPValue, FunctionPointers) {
const Pointer &GP = getGlobalPtr("func");
const FunctionPointer &FP = GP.deref<FunctionPointer>();
ASSERT_FALSE(FP.isZero());
APValue A = FP.toAPValue();
APValue A = FP.toAPValue(ASTCtx);
ASSERT_TRUE(A.hasValue());
ASSERT_TRUE(A.isLValue());
ASSERT_TRUE(A.hasLValuePath());
Expand All @@ -132,7 +134,7 @@ TEST(ToAPValue, FunctionPointers) {
ASSERT_NE(D, nullptr);
const Pointer &GP = getGlobalPtr("nullp");
const auto &P = GP.deref<FunctionPointer>();
APValue A = P.toAPValue();
APValue A = P.toAPValue(ASTCtx);
ASSERT_TRUE(A.isLValue());
ASSERT_TRUE(A.getLValueBase().isNull());
ASSERT_TRUE(A.isNullPointer());
Expand All @@ -151,6 +153,7 @@ TEST(ToAPValue, FunctionPointersC) {
auto AST = tooling::buildASTFromCodeWithArgs(
Code, {"-x", "c", "-fexperimental-new-constant-interpreter"});

auto &ASTCtx = AST->getASTContext();
auto &Ctx = AST->getASTContext().getInterpContext();
Program &Prog = Ctx.getProgram();

Expand All @@ -174,7 +177,7 @@ TEST(ToAPValue, FunctionPointersC) {
ASSERT_TRUE(GP.isLive());
const FunctionPointer &FP = GP.deref<FunctionPointer>();
ASSERT_FALSE(FP.isZero());
APValue A = FP.toAPValue();
APValue A = FP.toAPValue(ASTCtx);
ASSERT_TRUE(A.hasValue());
ASSERT_TRUE(A.isLValue());
const auto &Path = A.getLValuePath();
Expand All @@ -197,6 +200,7 @@ TEST(ToAPValue, MemberPointers) {
auto AST = tooling::buildASTFromCodeWithArgs(
Code, {"-fexperimental-new-constant-interpreter"});

auto &ASTCtx = AST->getASTContext();
auto &Ctx = AST->getASTContext().getInterpContext();
Program &Prog = Ctx.getProgram();

Expand All @@ -218,7 +222,7 @@ TEST(ToAPValue, MemberPointers) {
const Pointer &GP = getGlobalPtr("pm");
ASSERT_TRUE(GP.isLive());
const MemberPointer &FP = GP.deref<MemberPointer>();
APValue A = FP.toAPValue();
APValue A = FP.toAPValue(ASTCtx);
ASSERT_EQ(A.getMemberPointerDecl(), getDecl("m"));
ASSERT_EQ(A.getKind(), APValue::MemberPointer);
}
Expand All @@ -228,7 +232,7 @@ TEST(ToAPValue, MemberPointers) {
ASSERT_TRUE(GP.isLive());
const MemberPointer &NP = GP.deref<MemberPointer>();
ASSERT_TRUE(NP.isZero());
APValue A = NP.toAPValue();
APValue A = NP.toAPValue(ASTCtx);
ASSERT_EQ(A.getKind(), APValue::MemberPointer);
}
}
28 changes: 28 additions & 0 deletions clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,4 +473,32 @@ TEST_F(EnvironmentTest, Stmt) {
Env.getResultObjectLocation(*Init);
}

// This is a crash repro.
TEST_F(EnvironmentTest, LambdaCapturingThisInFieldInitializer) {
using namespace ast_matchers;
std::string Code = R"cc(
struct S {
int f{[this]() { return 1; }()};
};
)cc";

auto Unit =
tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++11"});
auto &Context = Unit->getASTContext();

ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U);

auto *LambdaCallOperator = selectFirst<CXXMethodDecl>(
"method", match(cxxMethodDecl(hasName("operator()"),
ofClass(cxxRecordDecl(isLambda())))
.bind("method"),
Context));

Environment Env(DAContext, *LambdaCallOperator);
// Don't crash when initializing.
Env.initialize();
// And initialize the captured `this` pointee.
ASSERT_NE(nullptr, Env.getThisPointeeStorageLocation());
}

} // namespace
1 change: 1 addition & 0 deletions clang/unittests/Format/FormatTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8742,6 +8742,7 @@ TEST_F(FormatTest, FunctionAnnotations) {
" << abc;");
verifyFormat("MACRO(abc)::function() // wrap\n"
" << abc;");
verifyFormat("FOO(bar)();", getLLVMStyleWithColumns(0));
}

TEST_F(FormatTest, BreaksDesireably) {
Expand Down
38 changes: 38 additions & 0 deletions clang/unittests/Format/TokenAnnotatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference);

Tokens = annotate("#define FOO bar(a * b)");
ASSERT_EQ(Tokens.size(), 10u) << Tokens;
EXPECT_TOKEN(Tokens[6], tok::star, TT_BinaryOperator);

Tokens = annotate("#define FOO foo.bar(a & b)");
ASSERT_EQ(Tokens.size(), 12u) << Tokens;
EXPECT_TOKEN(Tokens[8], tok::amp, TT_BinaryOperator);

Tokens = annotate("#define FOO foo::bar(a && b)");
ASSERT_EQ(Tokens.size(), 12u) << Tokens;
EXPECT_TOKEN(Tokens[8], tok::ampamp, TT_BinaryOperator);

Tokens = annotate("#define FOO foo bar(a *b)");
ASSERT_EQ(Tokens.size(), 11u) << Tokens;
EXPECT_TOKEN(Tokens[7], tok::star, TT_PointerOrReference);

Tokens = annotate("#define FOO void foo::bar(a &b)");
ASSERT_EQ(Tokens.size(), 13u) << Tokens;
EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);

Tokens = annotate("void f() {\n"
" while (p < a && *p == 'a')\n"
" p++;\n"
Expand Down Expand Up @@ -1910,6 +1930,10 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionAnnotations) {
"A(T) noexcept;");
ASSERT_EQ(Tokens.size(), 12u) << Tokens;
EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);

Tokens = annotate("FOO(bar)();");
ASSERT_EQ(Tokens.size(), 8u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::r_paren, TT_Unknown);
}

TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) {
Expand Down Expand Up @@ -2105,6 +2129,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) {
ASSERT_EQ(Tokens.size(), 21u) << Tokens;
EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown);

auto Style = getLLVMStyle();
Style.StatementAttributeLikeMacros.push_back("emit");
Tokens = annotate("emit foo()->bar;", Style);
ASSERT_EQ(Tokens.size(), 8u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_StatementAttributeLikeMacro);
EXPECT_TOKEN(Tokens[4], tok::arrow, TT_Unknown);

// Mixed
Tokens = annotate("auto f() -> int { auto a = b()->c; }");
ASSERT_EQ(Tokens.size(), 18u) << Tokens;
Expand Down Expand Up @@ -2930,6 +2961,13 @@ TEST_F(TokenAnnotatorTest, StartOfName) {
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::at, TT_ObjCDecl);
EXPECT_TOKEN(Tokens[2], tok::identifier, TT_StartOfName);

auto Style = getLLVMStyle();
Style.StatementAttributeLikeMacros.push_back("emit");
Tokens = annotate("emit foo = 0;", Style);
ASSERT_EQ(Tokens.size(), 6u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_StatementAttributeLikeMacro);
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_Unknown);
}

TEST_F(TokenAnnotatorTest, BraceKind) {
Expand Down
14 changes: 7 additions & 7 deletions clang/unittests/Support/TimeProfilerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,18 @@ bool compileFromString(StringRef Code, StringRef Standard, StringRef File,
}

std::string GetMetadata(json::Object *Event) {
std::string Metadata;
llvm::raw_string_ostream OS(Metadata);
std::string M;
llvm::raw_string_ostream OS(M);
if (json::Object *Args = Event->getObject("args")) {
if (auto Detail = Args->getString("detail"))
OS << Detail;
// Use only filename to not include os-specific path separators.
if (auto File = Args->getString("file"))
OS << ", " << llvm::sys::path::filename(*File);
OS << (M.empty() ? "" : ", ") << llvm::sys::path::filename(*File);
if (auto Line = Args->getInteger("line"))
OS << ":" << *Line;
}
return Metadata;
return M;
}

// Returns pretty-printed trace graph.
Expand Down Expand Up @@ -209,7 +209,7 @@ constexpr int slow_init_list[] = {1, 1, 2, 3, 5, 8, 13, 21}; // 25th line
ASSERT_TRUE(compileFromString(Code, "-std=c++20", "test.cc"));
std::string Json = teardownProfiler();
ASSERT_EQ(R"(
Frontend
Frontend (test.cc)
| ParseDeclarationOrFunctionDefinition (test.cc:2:1)
| ParseDeclarationOrFunctionDefinition (test.cc:6:1)
| | ParseFunctionDefinition (slow_func)
Expand Down Expand Up @@ -266,7 +266,7 @@ TEST(TimeProfilerTest, TemplateInstantiations) {
/*Headers=*/{{"a.h", A_H}, {"b.h", B_H}}));
std::string Json = teardownProfiler();
ASSERT_EQ(R"(
Frontend
Frontend (test.cc)
| ParseFunctionDefinition (fooB)
| ParseFunctionDefinition (fooMTA)
| ParseFunctionDefinition (fooA)
Expand All @@ -291,7 +291,7 @@ struct {
ASSERT_TRUE(compileFromString(Code, "-std=c99", "test.c"));
std::string Json = teardownProfiler();
ASSERT_EQ(R"(
Frontend
Frontend (test.c)
| ParseDeclarationOrFunctionDefinition (test.c:2:1)
| | isIntegerConstantExpr (<test.c:3:18>)
| | EvaluateKnownConstIntCheckOverflow (<test.c:3:18>)
Expand Down
6 changes: 3 additions & 3 deletions clang/utils/TableGen/NeonEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
char typeCode = '\0';
bool printNumber = true;

if (CK == ClassB && TargetGuard == "")
if (CK == ClassB && TargetGuard == "neon")
return "";

if (T.isBFloat16())
Expand All @@ -976,7 +976,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
break;
}
}
if (CK == ClassB && TargetGuard == "") {
if (CK == ClassB && TargetGuard == "neon") {
typeCode = '\0';
}

Expand Down Expand Up @@ -1078,7 +1078,7 @@ std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
S += "_" + getInstTypeCode(InBaseType, LocalCK);
}

if (LocalCK == ClassB && TargetGuard == "")
if (LocalCK == ClassB && TargetGuard == "neon")
S += "_v";

// Insert a 'q' before the first '_' character so that it ends up before
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/asan/asan_interceptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ INTERCEPTOR(int, atexit, void (*func)()) {
extern "C" {
extern int _pthread_atfork(void (*prepare)(), void (*parent)(),
void (*child)());
};
}

INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(),
void (*child)()) {
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ set(aarch64_SOURCES

if (COMPILER_RT_HAS_AARCH64_SME)
if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c aarch64/sme-abi-vg.c aarch64/sme-libc-routines.c)
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-abi-vg.c aarch64/sme-libc-routines.c)
message(STATUS "AArch64 SME ABI routines enabled")
set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
else()
Expand Down Expand Up @@ -739,6 +739,7 @@ endif()
set(powerpc64le_SOURCES ${powerpc64_SOURCES})

set(riscv_SOURCES
riscv/feature_bits.c
riscv/fp_mode.c
riscv/save.S
riscv/restore.S
Expand Down
344 changes: 344 additions & 0 deletions compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// Routines taken from libc/AOR_v20.02/string/aarch64

#include "../assembly.h"

#ifdef __aarch64__

#define L(l) .L ## l

//
// __arm_sc_memcpy / __arm_sc_memmove
//

#define dstin x0
#define src x1
#define count x2
#define dst x3
#define srcend1 x4
#define dstend1 x5
#define A_l x6
#define A_lw w6
#define A_h x7
#define B_l x8
#define B_lw w8
#define B_h x9
#define C_l x10
#define C_lw w10
#define C_h x11
#define D_l x12
#define D_h x13
#define E_l x14
#define E_h x15
#define F_l x16
#define F_h x17
#define G_l count
#define G_h dst
#define H_l src
#define H_h srcend1
#define tmp1 x14

/* This implementation handles overlaps and supports both memcpy and memmove
from a single entry point. It uses unaligned accesses and branchless
sequences to keep the code small, simple and improve performance.
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
copies of up to 128 bytes, and large copies. The overhead of the overlap
check is negligible since it is only required for large copies.
Large copies use a software pipelined loop processing 64 bytes per iteration.
The destination pointer is 16-byte aligned to minimize unaligned accesses.
The loop tail is handled by always copying 64 bytes from the end.
*/

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memcpy)
add srcend1, src, count
add dstend1, dstin, count
cmp count, 128
b.hi L(copy_long)
cmp count, 32
b.hi L(copy32_128)

/* Small copies: 0..32 bytes. */
cmp count, 16
b.lo L(copy16)
ldp A_l, A_h, [src]
ldp D_l, D_h, [srcend1, -16]
stp A_l, A_h, [dstin]
stp D_l, D_h, [dstend1, -16]
ret

/* Copy 8-15 bytes. */
L(copy16):
tbz count, 3, L(copy8)
ldr A_l, [src]
ldr A_h, [srcend1, -8]
str A_l, [dstin]
str A_h, [dstend1, -8]
ret

.p2align 3
/* Copy 4-7 bytes. */
L(copy8):
tbz count, 2, L(copy4)
ldr A_lw, [src]
ldr B_lw, [srcend1, -4]
str A_lw, [dstin]
str B_lw, [dstend1, -4]
ret

/* Copy 0..3 bytes using a branchless sequence. */
L(copy4):
cbz count, L(copy0)
lsr tmp1, count, 1
ldrb A_lw, [src]
ldrb C_lw, [srcend1, -1]
ldrb B_lw, [src, tmp1]
strb A_lw, [dstin]
strb B_lw, [dstin, tmp1]
strb C_lw, [dstend1, -1]
L(copy0):
ret

.p2align 4
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_l, A_h, [src]
ldp B_l, B_h, [src, 16]
ldp C_l, C_h, [srcend1, -32]
ldp D_l, D_h, [srcend1, -16]
cmp count, 64
b.hi L(copy128)
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstend1, -32]
stp D_l, D_h, [dstend1, -16]
ret

.p2align 4
/* Copy 65..128 bytes. */
L(copy128):
ldp E_l, E_h, [src, 32]
ldp F_l, F_h, [src, 48]
cmp count, 96
b.ls L(copy96)
ldp G_l, G_h, [srcend1, -64]
ldp H_l, H_h, [srcend1, -48]
stp G_l, G_h, [dstend1, -64]
stp H_l, H_h, [dstend1, -48]
L(copy96):
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp E_l, E_h, [dstin, 32]
stp F_l, F_h, [dstin, 48]
stp C_l, C_h, [dstend1, -32]
stp D_l, D_h, [dstend1, -16]
ret

.p2align 4
/* Copy more than 128 bytes. */
L(copy_long):
/* Use backwards copy if there is an overlap. */
sub tmp1, dstin, src
cbz tmp1, L(copy0)
cmp tmp1, count
b.lo L(copy_long_backwards)

/* Copy 16 bytes and then align dst to 16-byte alignment. */

ldp D_l, D_h, [src]
and tmp1, dstin, 15
bic dst, dstin, 15
sub src, src, tmp1
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
ldp B_l, B_h, [src, 32]
ldp C_l, C_h, [src, 48]
ldp D_l, D_h, [src, 64]!
subs count, count, 128 + 16 /* Test and readjust count. */
b.ls L(copy64_from_end)
L(loop64):
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [src, 32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [src, 48]
stp D_l, D_h, [dst, 64]!
ldp D_l, D_h, [src, 64]!
subs count, count, 64
b.hi L(loop64)

/* Write the last iteration and copy 64 bytes from the end. */
L(copy64_from_end):
ldp E_l, E_h, [srcend1, -64]
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [srcend1, -48]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [srcend1, -32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [srcend1, -16]
stp D_l, D_h, [dst, 64]
stp E_l, E_h, [dstend1, -64]
stp A_l, A_h, [dstend1, -48]
stp B_l, B_h, [dstend1, -32]
stp C_l, C_h, [dstend1, -16]
ret

.p2align 4

/* Large backwards copy for overlapping copies.
Copy 16 bytes and then align dst to 16-byte alignment. */
L(copy_long_backwards):
ldp D_l, D_h, [srcend1, -16]
and tmp1, dstend1, 15
sub srcend1, srcend1, tmp1
sub count, count, tmp1
ldp A_l, A_h, [srcend1, -16]
stp D_l, D_h, [dstend1, -16]
ldp B_l, B_h, [srcend1, -32]
ldp C_l, C_h, [srcend1, -48]
ldp D_l, D_h, [srcend1, -64]!
sub dstend1, dstend1, tmp1
subs count, count, 128
b.ls L(copy64_from_start)

L(loop64_backwards):
stp A_l, A_h, [dstend1, -16]
ldp A_l, A_h, [srcend1, -16]
stp B_l, B_h, [dstend1, -32]
ldp B_l, B_h, [srcend1, -32]
stp C_l, C_h, [dstend1, -48]
ldp C_l, C_h, [srcend1, -48]
stp D_l, D_h, [dstend1, -64]!
ldp D_l, D_h, [srcend1, -64]!
subs count, count, 64
b.hi L(loop64_backwards)

/* Write the last iteration and copy 64 bytes from the start. */
L(copy64_from_start):
ldp G_l, G_h, [src, 48]
stp A_l, A_h, [dstend1, -16]
ldp A_l, A_h, [src, 32]
stp B_l, B_h, [dstend1, -32]
ldp B_l, B_h, [src, 16]
stp C_l, C_h, [dstend1, -48]
ldp C_l, C_h, [src]
stp D_l, D_h, [dstend1, -64]
stp G_l, G_h, [dstin, 48]
stp A_l, A_h, [dstin, 32]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memcpy)

DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)


//
// __arm_sc_memset
//

#define dstin x0
#define val x1
#define valw w1
#define count x2
#define dst x3
#define dstend2 x4
#define zva_val x5

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
dup v0.16B, valw
add dstend2, dstin, count

cmp count, 96
b.hi L(set_long)
cmp count, 16
b.hs L(set_medium)
mov val, v0.D[0]

/* Set 0..15 bytes. */
tbz count, 3, 1f
str val, [dstin]
str val, [dstend2, -8]
ret
nop
1: tbz count, 2, 2f
str valw, [dstin]
str valw, [dstend2, -4]
ret
2: cbz count, 3f
strb valw, [dstin]
tbz count, 1, 3f
strh valw, [dstend2, -2]
3: ret

/* Set 17..96 bytes. */
L(set_medium):
str q0, [dstin]
tbnz count, 6, L(set96)
str q0, [dstend2, -16]
tbz count, 5, 1f
str q0, [dstin, 16]
str q0, [dstend2, -32]
1: ret

.p2align 4
/* Set 64..96 bytes. Write 64 bytes from the start and
32 bytes from the end. */
L(set96):
str q0, [dstin, 16]
stp q0, q0, [dstin, 32]
stp q0, q0, [dstend2, -32]
ret

.p2align 4
L(set_long):
and valw, valw, 255
bic dst, dstin, 15
str q0, [dstin]
cmp count, 160
ccmp valw, 0, 0, hs
b.ne L(no_zva)

#ifndef SKIP_ZVA_CHECK
mrs zva_val, dczid_el0
and zva_val, zva_val, 31
cmp zva_val, 4 /* ZVA size is 64 bytes. */
b.ne L(no_zva)
#endif
str q0, [dst, 16]
stp q0, q0, [dst, 32]
bic dst, dst, 63
sub count, dstend2, dst /* Count is now 64 too large. */
sub count, count, 128 /* Adjust count and bias for loop. */

.p2align 4
L(zva_loop):
add dst, dst, 64
dc zva, dst
subs count, count, 64
b.hi L(zva_loop)
stp q0, q0, [dstend2, -64]
stp q0, q0, [dstend2, -32]
ret

L(no_zva):
sub count, dstend2, dst /* Count is 16 too large. */
sub dst, dst, 16 /* Dst is biased by -32. */
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
L(no_zva_loop):
stp q0, q0, [dst, 32]
stp q0, q0, [dst, 64]!
subs count, count, 64
b.hi L(no_zva_loop)
stp q0, q0, [dstend2, -64]
stp q0, q0, [dstend2, -32]
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memset)

#endif // __aarch64__
75 changes: 0 additions & 75 deletions compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
Original file line number Diff line number Diff line change
@@ -1,80 +1,5 @@
#include <stddef.h>

// WARNING: When building the scalar versions of these functions you need to
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
// from recognising a loop idiom and planting calls to memcpy!

static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
size_t n) __arm_streaming_compatible {
unsigned char *destp = (unsigned char *)dest;
const unsigned char *srcp = (const unsigned char *)src;
for (size_t i = 0; i < n; ++i)
destp[i] = srcp[i];

return dest;
}

// If dest and src overlap then behaviour is undefined, hence we can add the
// restrict keywords here. This also matches the definition of the libc memcpy
// according to the man page.
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
size_t n) __arm_streaming_compatible {
return __arm_sc_memcpy_fwd(dest, src, n);
}

void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
unsigned char *destp = (unsigned char *)dest;
unsigned char c8 = (unsigned char)c;
for (size_t i = 0; i < n; ++i)
destp[i] = c8;

return dest;
}

static void *__arm_sc_memcpy_rev(void *dest, const void *src,
size_t n) __arm_streaming_compatible {
unsigned char *destp = (unsigned char *)dest;
const unsigned char *srcp = (const unsigned char *)src;
// TODO: Improve performance by copying larger chunks in reverse, or by
// using SVE.
while (n > 0) {
--n;
destp[n] = srcp[n];
}
return dest;
}

// Semantically a memmove is equivalent to the following:
// 1. Copy the entire contents of src to a temporary array that does not
// overlap with src or dest.
// 2. Copy the contents of the temporary array into dest.
void *__arm_sc_memmove(void *dest, const void *src,
size_t n) __arm_streaming_compatible {
unsigned char *destp = (unsigned char *)dest;
const unsigned char *srcp = (const unsigned char *)src;

// If src and dest don't overlap then just invoke memcpy
if ((srcp > (destp + n)) || (destp > (srcp + n)))
return __arm_sc_memcpy_fwd(dest, src, n);

// Overlap case 1:
// src: Low | -> | High
// dest: Low | -> | High
// Here src is always ahead of dest at a higher addres. If we first read a
// chunk of data from src we can safely write the same chunk to dest without
// corrupting future reads of src.
if (srcp > destp)
return __arm_sc_memcpy_fwd(dest, src, n);

// Overlap case 2:
// src: Low | -> | High
// dest: Low | -> | High
// While we're in the overlap region we're always corrupting future reads of
// src when writing to dest. An efficient way to do this is to copy the data
// in reverse by starting at the highest address.
return __arm_sc_memcpy_rev(dest, src, n);
}

const void *__arm_sc_memchr(const void *src, int c,
size_t n) __arm_streaming_compatible {
const unsigned char *srcp = (const unsigned char *)src;
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/cpu_model/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ enum ProcessorFeatures {
FEATURE_AVX512VP2INTERSECT,
// FIXME: Below Features has some missings comparing to gcc, it's because gcc
// has some not one-to-one mapped in llvm.
FEATURE_3DNOW,
// FEATURE_3DNOW,
// FEATURE_3DNOWP,
FEATURE_ADX = 40,
// FEATURE_ABM,
Expand Down
298 changes: 298 additions & 0 deletions compiler-rt/lib/builtins/riscv/feature_bits.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
//=== feature_bits.c - Update RISC-V Feature Bits Structure -*- C -*-=========//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#define RISCV_FEATURE_BITS_LENGTH 1
struct {
unsigned length;
unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
} __riscv_feature_bits __attribute__((visibility("hidden"), nocommon));

#define RISCV_VENDOR_FEATURE_BITS_LENGTH 1
struct {
unsigned vendorID;
unsigned length;
unsigned long long features[RISCV_VENDOR_FEATURE_BITS_LENGTH];
} __riscv_vendor_feature_bits __attribute__((visibility("hidden"), nocommon));

// NOTE: Should sync-up with RISCVFeatures.td
// TODO: Maybe generate a header from tablegen then include it.
#define A_GROUPID 0
#define A_BITMASK (1ULL << 0)
#define C_GROUPID 0
#define C_BITMASK (1ULL << 2)
#define D_GROUPID 0
#define D_BITMASK (1ULL << 3)
#define F_GROUPID 0
#define F_BITMASK (1ULL << 5)
#define I_GROUPID 0
#define I_BITMASK (1ULL << 8)
#define M_GROUPID 0
#define M_BITMASK (1ULL << 12)
#define V_GROUPID 0
#define V_BITMASK (1ULL << 21)
#define ZACAS_GROUPID 0
#define ZACAS_BITMASK (1ULL << 26)
#define ZBA_GROUPID 0
#define ZBA_BITMASK (1ULL << 27)
#define ZBB_GROUPID 0
#define ZBB_BITMASK (1ULL << 28)
#define ZBC_GROUPID 0
#define ZBC_BITMASK (1ULL << 29)
#define ZBKB_GROUPID 0
#define ZBKB_BITMASK (1ULL << 30)
#define ZBKC_GROUPID 0
#define ZBKC_BITMASK (1ULL << 31)
#define ZBKX_GROUPID 0
#define ZBKX_BITMASK (1ULL << 32)
#define ZBS_GROUPID 0
#define ZBS_BITMASK (1ULL << 33)
#define ZFA_GROUPID 0
#define ZFA_BITMASK (1ULL << 34)
#define ZFH_GROUPID 0
#define ZFH_BITMASK (1ULL << 35)
#define ZFHMIN_GROUPID 0
#define ZFHMIN_BITMASK (1ULL << 36)
#define ZICBOZ_GROUPID 0
#define ZICBOZ_BITMASK (1ULL << 37)
#define ZICOND_GROUPID 0
#define ZICOND_BITMASK (1ULL << 38)
#define ZIHINTNTL_GROUPID 0
#define ZIHINTNTL_BITMASK (1ULL << 39)
#define ZIHINTPAUSE_GROUPID 0
#define ZIHINTPAUSE_BITMASK (1ULL << 40)
#define ZKND_GROUPID 0
#define ZKND_BITMASK (1ULL << 41)
#define ZKNE_GROUPID 0
#define ZKNE_BITMASK (1ULL << 42)
#define ZKNH_GROUPID 0
#define ZKNH_BITMASK (1ULL << 43)
#define ZKSED_GROUPID 0
#define ZKSED_BITMASK (1ULL << 44)
#define ZKSH_GROUPID 0
#define ZKSH_BITMASK (1ULL << 45)
#define ZKT_GROUPID 0
#define ZKT_BITMASK (1ULL << 46)
#define ZTSO_GROUPID 0
#define ZTSO_BITMASK (1ULL << 47)
#define ZVBB_GROUPID 0
#define ZVBB_BITMASK (1ULL << 48)
#define ZVBC_GROUPID 0
#define ZVBC_BITMASK (1ULL << 49)
#define ZVFH_GROUPID 0
#define ZVFH_BITMASK (1ULL << 50)
#define ZVFHMIN_GROUPID 0
#define ZVFHMIN_BITMASK (1ULL << 51)
#define ZVKB_GROUPID 0
#define ZVKB_BITMASK (1ULL << 52)
#define ZVKG_GROUPID 0
#define ZVKG_BITMASK (1ULL << 53)
#define ZVKNED_GROUPID 0
#define ZVKNED_BITMASK (1ULL << 54)
#define ZVKNHA_GROUPID 0
#define ZVKNHA_BITMASK (1ULL << 55)
#define ZVKNHB_GROUPID 0
#define ZVKNHB_BITMASK (1ULL << 56)
#define ZVKSED_GROUPID 0
#define ZVKSED_BITMASK (1ULL << 57)
#define ZVKSH_GROUPID 0
#define ZVKSH_BITMASK (1ULL << 58)
#define ZVKT_GROUPID 0
#define ZVKT_BITMASK (1ULL << 59)

#if defined(__linux__)

static long syscall_impl_5_args(long number, long arg1, long arg2, long arg3,
long arg4, long arg5) {
register long a7 __asm__("a7") = number;
register long a0 __asm__("a0") = arg1;
register long a1 __asm__("a1") = arg2;
register long a2 __asm__("a2") = arg3;
register long a3 __asm__("a3") = arg4;
register long a4 __asm__("a4") = arg5;
__asm__ __volatile__("ecall\n\t"
: "=r"(a0)
: "r"(a7), "r"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
: "memory");
return a0;
}

#define RISCV_HWPROBE_KEY_MVENDORID 0
#define RISCV_HWPROBE_KEY_MARCHID 1
#define RISCV_HWPROBE_KEY_MIMPID 2
#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1ULL << 0)
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_FD (1ULL << 0)
#define RISCV_HWPROBE_IMA_C (1ULL << 1)
#define RISCV_HWPROBE_IMA_V (1ULL << 2)
#define RISCV_HWPROBE_EXT_ZBA (1ULL << 3)
#define RISCV_HWPROBE_EXT_ZBB (1ULL << 4)
#define RISCV_HWPROBE_EXT_ZBS (1ULL << 5)
#define RISCV_HWPROBE_EXT_ZICBOZ (1ULL << 6)
#define RISCV_HWPROBE_EXT_ZBC (1ULL << 7)
#define RISCV_HWPROBE_EXT_ZBKB (1ULL << 8)
#define RISCV_HWPROBE_EXT_ZBKC (1ULL << 9)
#define RISCV_HWPROBE_EXT_ZBKX (1ULL << 10)
#define RISCV_HWPROBE_EXT_ZKND (1ULL << 11)
#define RISCV_HWPROBE_EXT_ZKNE (1ULL << 12)
#define RISCV_HWPROBE_EXT_ZKNH (1ULL << 13)
#define RISCV_HWPROBE_EXT_ZKSED (1ULL << 14)
#define RISCV_HWPROBE_EXT_ZKSH (1ULL << 15)
#define RISCV_HWPROBE_EXT_ZKT (1ULL << 16)
#define RISCV_HWPROBE_EXT_ZVBB (1ULL << 17)
#define RISCV_HWPROBE_EXT_ZVBC (1ULL << 18)
#define RISCV_HWPROBE_EXT_ZVKB (1ULL << 19)
#define RISCV_HWPROBE_EXT_ZVKG (1ULL << 20)
#define RISCV_HWPROBE_EXT_ZVKNED (1ULL << 21)
#define RISCV_HWPROBE_EXT_ZVKNHA (1ULL << 22)
#define RISCV_HWPROBE_EXT_ZVKNHB (1ULL << 23)
#define RISCV_HWPROBE_EXT_ZVKSED (1ULL << 24)
#define RISCV_HWPROBE_EXT_ZVKSH (1ULL << 25)
#define RISCV_HWPROBE_EXT_ZVKT (1ULL << 26)
#define RISCV_HWPROBE_EXT_ZFH (1ULL << 27)
#define RISCV_HWPROBE_EXT_ZFHMIN (1ULL << 28)
#define RISCV_HWPROBE_EXT_ZIHINTNTL (1ULL << 29)
#define RISCV_HWPROBE_EXT_ZVFH (1ULL << 30)
#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31)
#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
#define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1ULL << 0)
#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */

struct riscv_hwprobe {
long long key;
unsigned long long value;
};

#define __NR_riscv_hwprobe 258
static long initHwProbe(struct riscv_hwprobe *Hwprobes, int len) {
return syscall_impl_5_args(__NR_riscv_hwprobe, (long)Hwprobes, len, 0, 0, 0);
}

#define SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(EXTNAME) \
SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_EXT_##EXTNAME, EXTNAME)

#define SET_SINGLE_IMAEXT_RISCV_FEATURE(HWPROBE_BITMASK, EXT) \
SET_SINGLE_RISCV_FEATURE(IMAEXT0Value &HWPROBE_BITMASK, EXT)

#define SET_SINGLE_RISCV_FEATURE(COND, EXT) \
if (COND) { \
SET_RISCV_FEATURE(EXT); \
}

#define SET_RISCV_FEATURE(EXT) features[EXT##_GROUPID] |= EXT##_BITMASK

static void initRISCVFeature(struct riscv_hwprobe Hwprobes[]) {

// Note: If a hwprobe key is unknown to the kernel, its key field
// will be cleared to -1, and its value set to 0.
// This unsets all extension bitmask bits.

// Init vendor extension
__riscv_vendor_feature_bits.length = 0;
__riscv_vendor_feature_bits.vendorID = Hwprobes[2].value;

// Init standard extension
// TODO: Maybe Extension implied generate from tablegen?
__riscv_feature_bits.length = RISCV_FEATURE_BITS_LENGTH;

unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
int i;

for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
features[i] = 0;

// Check RISCV_HWPROBE_KEY_BASE_BEHAVIOR
unsigned long long BaseValue = Hwprobes[0].value;
if (BaseValue & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) {
SET_RISCV_FEATURE(I);
SET_RISCV_FEATURE(M);
SET_RISCV_FEATURE(A);
}

// Check RISCV_HWPROBE_KEY_IMA_EXT_0
unsigned long long IMAEXT0Value = Hwprobes[1].value;
if (IMAEXT0Value & RISCV_HWPROBE_IMA_FD) {
SET_RISCV_FEATURE(F);
SET_RISCV_FEATURE(D);
}

SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_C, C);
SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_V, V);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBA);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBB);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBS);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICBOZ);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBC);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKB);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKC);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKX);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKND);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNE);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNH);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSED);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSH);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKT);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBB);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBC);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKB);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKG);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNED);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHA);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHB);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSED);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSH);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKT);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFH);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFHMIN);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTNTL);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTPAUSE);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFH);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFHMIN);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFA);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZTSO);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZACAS);
SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICOND);

for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
__riscv_feature_bits.features[i] = features[i];
}

#endif // defined(__linux__)

static int FeaturesBitCached = 0;

void __init_riscv_feature_bits() {

if (FeaturesBitCached)
return;

#if defined(__linux__)
struct riscv_hwprobe Hwprobes[] = {
{RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0},
{RISCV_HWPROBE_KEY_IMA_EXT_0, 0},
{RISCV_HWPROBE_KEY_MVENDORID, 0},
};
if (initHwProbe(Hwprobes, sizeof(Hwprobes) / sizeof(Hwprobes[0])))
return;

initRISCVFeature(Hwprobes);
#endif // defined(__linux__)

FeaturesBitCached = 1;
}
2 changes: 1 addition & 1 deletion compiler-rt/lib/lsan/lsan_interceptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ INTERCEPTOR(int, atexit, void (*f)()) {
extern "C" {
extern int _pthread_atfork(void (*prepare)(), void (*parent)(),
void (*child)());
};
}

INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(),
void (*child)()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6336,7 +6336,7 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) {

const char *SelfFName = DladdrSelfFName();
VPrintf(1, "dlopen interceptor: DladdrSelfFName: %p %s\n",
(void *)SelfFName, SelfFName);
(const void *)SelfFName, SelfFName);

if (SelfFName && internal_strcmp(SelfFName, filename) == 0) {
// It's possible they copied the string from dladdr, so
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_procmaps_bsd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
struct kinfo_proc2 *InfoProc;
uptr Len = sizeof(*InfoProc);
uptr Size = Len;
const int Mib[] = {CTL_KERN, KERN_PROC2, KERN_PROC_PID, getpid(), Size, 1};
const int Mib[] = {CTL_KERN, KERN_PROC2, KERN_PROC_PID,
getpid(), (int)Size, 1};
InfoProc = (struct kinfo_proc2 *)MmapOrDie(Size, "GetMemoryProfile()");
CHECK_EQ(
internal_sysctl(Mib, ARRAY_SIZE(Mib), nullptr, (uptr *)InfoProc, &Len, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ void FormattedStackTracePrinter::RenderFrame(InternalScopedString *buffer,
break;
default:
Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
(void *)p);
(const void *)p);
Die();
}
}
Expand Down Expand Up @@ -323,7 +323,7 @@ void FormattedStackTracePrinter::RenderData(InternalScopedString *buffer,
break;
default:
Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
(void *)p);
(const void *)p);
Die();
}
}
Expand Down
54 changes: 27 additions & 27 deletions flang/lib/Lower/ConvertCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,26 +1227,32 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
return hlfir::Entity{copyIn.getCopiedIn()};
};

auto genSetDynamicTypeToDummyType = [&](hlfir::Entity var) -> hlfir::Entity {
fir::BaseBoxType boxType = fir::BoxType::get(
hlfir::getFortranElementOrSequenceType(dummyTypeWithActualRank));
if (actualIsAssumedRank)
return hlfir::Entity{builder.create<fir::ReboxAssumedRankOp>(
loc, boxType, var, fir::LowerBoundModifierAttribute::SetToOnes)};
// Use actual shape when creating descriptor with dummy type, the dummy
// shape may be unknown in case of sequence association.
mlir::Type actualTy =
hlfir::getFortranElementOrSequenceType(actual.getType());
boxType = boxType.getBoxTypeWithNewShape(actualTy);
return hlfir::Entity{builder.create<fir::ReboxOp>(loc, boxType, var,
/*shape=*/mlir::Value{},
/*slice=*/mlir::Value{})};
};

// Step 2: prepare the storage for the dummy arguments, ensuring that it
// matches the dummy requirements (e.g., must be contiguous or must be
// a temporary).
hlfir::Entity entity =
hlfir::derefPointersAndAllocatables(loc, builder, actual);
if (entity.isVariable()) {
if (mustSetDynamicTypeToDummyType) {
// Note: this is important to do this before any copy-in or copy so
// that the dummy is contiguous according to the dummy type.
mlir::Type boxType = fir::BoxType::get(
hlfir::getFortranElementOrSequenceType(dummyTypeWithActualRank));
if (actualIsAssumedRank) {
entity = hlfir::Entity{builder.create<fir::ReboxAssumedRankOp>(
loc, boxType, entity, fir::LowerBoundModifierAttribute::SetToOnes)};
} else {
entity = hlfir::Entity{builder.create<fir::ReboxOp>(
loc, boxType, entity, /*shape=*/mlir::Value{},
/*slice=*/mlir::Value{})};
}
}
// Set dynamic type if needed before any copy-in or copy so that the dummy
// is contiguous according to the dummy type.
if (mustSetDynamicTypeToDummyType)
entity = genSetDynamicTypeToDummyType(entity);
if (arg.hasValueAttribute() ||
// Constant expressions might be lowered as variables with
// 'parameter' attribute. Even though the constant expressions
Expand Down Expand Up @@ -1285,20 +1291,14 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
loc, builder, entity, storageType, "", byRefAttr);
entity = hlfir::Entity{associate.getBase()};
preparedDummy.pushExprAssociateCleanUp(associate);
// Rebox the actual argument to the dummy argument's type, and make sure
// that we pass a contiguous entity (i.e. make copy-in, if needed).
//
// TODO: this can probably be optimized by associating the expression with
// properly typed temporary, but this needs either a new operation or
// making the hlfir.associate more complex.
if (mustSetDynamicTypeToDummyType) {
// Rebox the actual argument to the dummy argument's type, and make
// sure that we pass a contiguous entity (i.e. make copy-in,
// if needed).
//
// TODO: this can probably be optimized by associating the expression
// with properly typed temporary, but this needs either a new operation
// or making the hlfir.associate more complex.
assert(!actualIsAssumedRank && "only variables are assumed-rank");
mlir::Type boxType = fir::BoxType::get(
hlfir::getFortranElementOrSequenceType(dummyTypeWithActualRank));
entity = hlfir::Entity{builder.create<fir::ReboxOp>(
loc, boxType, entity, /*shape=*/mlir::Value{},
/*slice=*/mlir::Value{})};
entity = genSetDynamicTypeToDummyType(entity);
entity = genCopyIn(entity, /*doCopyOut=*/false);
}
}
Expand Down
23 changes: 23 additions & 0 deletions flang/lib/Lower/DirectivesCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/openmp-directive-sets.h"
Expand Down Expand Up @@ -135,6 +136,22 @@ static inline void genOmpAtomicHintAndMemoryOrderClauses(
}
}

template <typename AtomicListT>
static void processOmpAtomicTODO(mlir::Type elementType, mlir::Location loc) {
if (!elementType)
return;
if constexpr (std::is_same<AtomicListT,
Fortran::parser::OmpAtomicClauseList>()) {
// Based on assertion for supported element types in OMPIRBuilder.cpp
// createAtomicRead
mlir::Type unwrappedEleTy = fir::unwrapRefType(elementType);
bool supportedAtomicType =
(!fir::isa_complex(unwrappedEleTy) && fir::isa_trivial(unwrappedEleTy));
if (!supportedAtomicType)
TODO(loc, "Unsupported atomic type");
}
}

/// Used to generate atomic.read operation which is created in existing
/// location set by builder.
template <typename AtomicListT>
Expand All @@ -147,6 +164,8 @@ static inline void genOmpAccAtomicCaptureStatement(
// Generate `atomic.read` operation for atomic assigment statements
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

processOmpAtomicTODO<AtomicListT>(elementType, loc);

if constexpr (std::is_same<AtomicListT,
Fortran::parser::OmpAtomicClauseList>()) {
// If no hint clause is specified, the effect is as if
Expand Down Expand Up @@ -183,6 +202,8 @@ static inline void genOmpAccAtomicWriteStatement(
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr);

processOmpAtomicTODO<AtomicListT>(varType, loc);

if constexpr (std::is_same<AtomicListT,
Fortran::parser::OmpAtomicClauseList>()) {
// If no hint clause is specified, the effect is as if
Expand Down Expand Up @@ -323,6 +344,8 @@ static inline void genOmpAccAtomicUpdateStatement(
currentLocation, lhsAddr);
}

processOmpAtomicTODO<AtomicListT>(varType, loc);

llvm::SmallVector<mlir::Type> varTys = {varType};
llvm::SmallVector<mlir::Location> locs = {currentLocation};
firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs);
Expand Down
49 changes: 47 additions & 2 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Semantics/tools.h"

namespace Fortran {
Expand Down Expand Up @@ -127,8 +128,52 @@ void DataSharingProcessor::copyFirstPrivateSymbol(
void DataSharingProcessor::copyLastPrivateSymbol(
const semantics::Symbol *sym,
[[maybe_unused]] mlir::OpBuilder::InsertPoint *lastPrivIP) {
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate))
converter.copyHostAssociateVar(*sym, lastPrivIP);
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate)) {
bool allocatable = semantics::IsAllocatable(sym->GetUltimate());
if (!allocatable) {
converter.copyHostAssociateVar(*sym, lastPrivIP);
return;
}

// copyHostAssociateVar doesn't work properly if the privatised copy was
// reallocated (e.g. by assignment): it will only copy if the ultimate
// symbol was already allocated, and it only copies data so any reallocated
// lengths etc are lost

// 1) Fetch the original copy of the variable.
assert(sym->has<Fortran::semantics::HostAssocDetails>() &&
"No host-association found");
const Fortran::semantics::Symbol &hsym = sym->GetUltimate();
Fortran::lower::SymbolBox hsb = symTable->lookupOneLevelUpSymbol(hsym);
assert(hsb && "Host symbol box not found");

// 2) Fetch the copied one that will mask the original.
Fortran::lower::SymbolBox sb = symTable->shallowLookupSymbol(sym);
assert(sb && "Host-associated symbol box not found");
assert(hsb.getAddr() != sb.getAddr() &&
"Host and associated symbol boxes are the same");

// 3) Perform the assignment.
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.genLocation(sym->name());
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
if (lastPrivIP && lastPrivIP->isSet())
builder.restoreInsertionPoint(*lastPrivIP);
else
builder.setInsertionPointAfter(sb.getAddr().getDefiningOp());

hlfir::Entity dst{hsb.getAddr()};
hlfir::Entity src{sb.getAddr()};
builder.create<hlfir::AssignOp>(
loc, src, dst, /*isWholeAllocatableAssignment=*/allocatable,
/*keepLhsLengthInAllocatableAssignment=*/false,
/*temporary_lhs=*/false);

if (lastPrivIP && lastPrivIP->isSet() &&
sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) {
builder.restoreInsertionPoint(insPt);
}
}
}

void DataSharingProcessor::collectOmpObjectListSymbol(
Expand Down
23 changes: 21 additions & 2 deletions flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "mlir/IR/Iterators.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"

Expand Down Expand Up @@ -325,6 +325,25 @@ class DummyScopeOpConversion
}
};

/// Simple DCE to erase fir.shape/shift/slice/unused shape operands after this
/// pass (fir.shape and like have no codegen).
/// mlir::RegionDCE is expensive and requires running
/// mlir::eraseUnreachableBlocks. It does things that are not needed here, like
/// removing unused block arguments. fir.shape/shift/slice cannot be block
/// arguments.
/// This helper does a naive backward walk of the IR. It is not even guaranteed
/// to walk blocks according to backward dominance, but that is good enough for
/// what is done here, fir.shape/shift/slice have no usages anymore. The
/// backward walk allows getting rid of most of the unused operands, it is not a
/// problem to leave some in the weird cases.
static void simpleDCE(mlir::RewriterBase &rewriter, mlir::Operation *op) {
op->walk<mlir::WalkOrder::PostOrder, mlir::ReverseIterator>(
[&](mlir::Operation *subOp) {
if (mlir::isOpTriviallyDead(subOp))
rewriter.eraseOp(subOp);
});
}

class CodeGenRewrite : public fir::impl::CodeGenRewriteBase<CodeGenRewrite> {
public:
using CodeGenRewriteBase<CodeGenRewrite>::CodeGenRewriteBase;
Expand Down Expand Up @@ -356,7 +375,7 @@ class CodeGenRewrite : public fir::impl::CodeGenRewriteBase<CodeGenRewrite> {
}
// Erase any residual (fir.shape, fir.slice...).
mlir::IRRewriter rewriter(&context);
(void)mlir::runRegionDCE(rewriter, mod->getRegions());
simpleDCE(rewriter, mod.getOperation());
}
};

Expand Down
12 changes: 12 additions & 0 deletions flang/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,15 @@ else()
FortranRuntime.static_dbg FortranRuntime.dynamic_dbg)
endif()
set_target_properties(FortranRuntime PROPERTIES FOLDER "Flang/Runtime Libraries")

# If FortranRuntime is part of a Flang build (and not a separate build) then
# add dependency to make sure that Fortran runtime library is being built after
# we have the Flang compiler available. This also includes the MODULE files
# that compile when the 'flang-new' target is built.
#
# TODO: This is a workaround and should be updated when runtime build procedure
# is changed to a regular runtime build. See discussion in PR #95388.
if (TARGET flang-new AND TARGET module_files)
add_dependencies(FortranRuntime flang-new module_files)
endif()

7 changes: 3 additions & 4 deletions flang/runtime/derived.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ static RT_API_ATTRS void GetComponentExtents(SubscriptValue (&extents)[maxRank],
const typeInfo::Component &comp, const Descriptor &derivedInstance) {
const typeInfo::Value *bounds{comp.bounds()};
for (int dim{0}; dim < comp.rank(); ++dim) {
SubscriptValue lb{bounds[2 * dim].GetValue(&derivedInstance).value_or(0)};
SubscriptValue ub{
bounds[2 * dim + 1].GetValue(&derivedInstance).value_or(0)};
extents[dim] = ub >= lb ? ub - lb + 1 : 0;
auto lb{bounds[2 * dim].GetValue(&derivedInstance).value_or(0)};
auto ub{bounds[2 * dim + 1].GetValue(&derivedInstance).value_or(0)};
extents[dim] = ub >= lb ? static_cast<SubscriptValue>(ub - lb + 1) : 0;
}
}

Expand Down
14 changes: 14 additions & 0 deletions flang/test/Fir/declare-codegen.fir
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,17 @@ func.func @useless_shape_with_duplicate_extent_operand(%arg0: !fir.ref<!fir.arra

// DECL-LABEL: func.func @useless_shape_with_duplicate_extent_operand(
// DECL: fircg.ext_declare

// Test DCE does not crash because of unreachable code.
func.func @unreachable_code(%arg0: !fir.ref<!fir.char<1,10>>) {
%c10 = arith.constant 10 : index
%2 = fir.declare %arg0 typeparams %c10 {uniq_name = "live_code"} : (!fir.ref<!fir.char<1,10>>, index) -> (!fir.ref<!fir.char<1,10>>)
return
^bb2: // no predecessors
%3 = fir.declare %arg0 typeparams %c10 {uniq_name = "dead_code"} : (!fir.ref<!fir.char<1,10>>, index) -> (!fir.ref<!fir.char<1,10>>)
fir.unreachable
}
// NODECL-LABEL: func.func @unreachable_code(
// NODECL-NOT: uniq_name = "live_code"
// DECL-LABEL: func.func @unreachable_code(
// DECL: uniq_name = "live_code"
26 changes: 26 additions & 0 deletions flang/test/Lower/HLFIR/calls-poly-to-nonpoly.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
! Test passing polymorphic variable for non-polymorphic dummy arguments:
! RUN: bbc -emit-hlfir -o - -I nowhere %s | FileCheck %s

subroutine test_sequence_association(x)
type t
integer :: i
end type
interface
subroutine sequence_assoc(x, n)
import :: t
type(t) :: x(n)
end subroutine
end interface
class(t) :: x(:, :)
call sequence_assoc(x, 100)
end subroutine
! CHECK-LABEL: func.func @_QPtest_sequence_association(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.class<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>>
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
! CHECK: %[[REBOX:.*]] = fir.rebox %[[VAL_3]]#0 : (!fir.class<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>) -> !fir.box<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>
! CHECK: %[[VAL_5:.*]]:2 = hlfir.copy_in %[[REBOX]] to %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>>>) -> (!fir.box<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>, i1)
! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_5]]#0 : (!fir.box<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>) -> !fir.ref<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>
! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>) -> !fir.ref<!fir.array<?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>
! CHECK: fir.call @_QPsequence_assoc(%[[VAL_7]], %{{.*}})
! CHECK: hlfir.copy_out %[[VAL_1]], %[[VAL_5]]#1 to %[[REBOX]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>>>, i1, !fir.box<!fir.array<?x?x!fir.type<_QFtest_sequence_associationTt{i:i32}>>>) -> ()
8 changes: 8 additions & 0 deletions flang/test/Lower/OpenMP/Todo/atomic-character.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s

! CHECK: not yet implemented: Unsupported atomic type
subroutine character_atomic
character :: l, r
!$omp atomic read
l = r
end subroutine
8 changes: 8 additions & 0 deletions flang/test/Lower/OpenMP/Todo/atomic-complex.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s

! CHECK: not yet implemented: Unsupported atomic type
subroutine complex_atomic
complex :: l, r
!$omp atomic read
l = r
end subroutine
28 changes: 12 additions & 16 deletions flang/test/Lower/OpenMP/atomic-read.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,18 @@
! This test checks the lowering of atomic read

!CHECK: func @_QQmain() attributes {fir.bindc_name = "ompatomic"} {
!CHECK: %[[A_C1:.*]] = arith.constant 1 : index
!CHECK: %[[A_REF:.*]] = fir.alloca !fir.char<1> {bindc_name = "a", uniq_name = "_QFEa"}
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_REF]] typeparams %[[A_C1]] {uniq_name = "_QFEa"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
!CHECK: %[[B_C1:.*]] = arith.constant 1 : index
!CHECK: %[[B_REF:.*]] = fir.alloca !fir.char<1> {bindc_name = "b", uniq_name = "_QFEb"}
!CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_REF]] typeparams %[[B_C1]] {uniq_name = "_QFEb"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
!CHECK: %[[A_REF:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFEa"}
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_REF]] {uniq_name = "_QFEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[B_REF:.*]] = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFEb"}
!CHECK: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_REF]] {uniq_name = "_QFEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[C_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "c", uniq_name = "_QFEc"}
!CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[C_REF]] {uniq_name = "_QFEc"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
!CHECK: %[[D_REF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "d", uniq_name = "_QFEd"}
!CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D_REF]] {uniq_name = "_QFEd"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
!CHECK: %[[E_C8:.*]] = arith.constant 8 : index
!CHECK: %[[E_REF:.*]] = fir.alloca !fir.char<1,8> {bindc_name = "e", uniq_name = "_QFEe"}
!CHECK: %[[E_DECL:.*]]:2 = hlfir.declare %[[E_REF]] typeparams %[[E_C8]] {uniq_name = "_QFEe"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
!CHECK: %[[F_C8:.*]] = arith.constant 8 : index
!CHECK: %[[F_REF:.*]] = fir.alloca !fir.char<1,8> {bindc_name = "f", uniq_name = "_QFEf"}
!CHECK: %[[F_DECL:.*]]:2 = hlfir.declare %[[F_REF]] typeparams %[[F_C8]] {uniq_name = "_QFEf"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
!CHECK: %[[E_REF:.*]] = fir.alloca i32 {bindc_name = "e", uniq_name = "_QFEe"}
!CHECK: %[[E_DECL:.*]]:2 = hlfir.declare %[[E_REF]] {uniq_name = "_QFEe"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[F_REF:.*]] = fir.alloca i32 {bindc_name = "f", uniq_name = "_QFEf"}
!CHECK: %[[F_DECL:.*]]:2 = hlfir.declare %[[F_REF]] {uniq_name = "_QFEf"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[G_REF:.*]] = fir.alloca f32 {bindc_name = "g", uniq_name = "_QFEg"}
!CHECK: %[[G_DECL:.*]]:2 = hlfir.declare %[[G_REF]] {uniq_name = "_QFEg"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[H_REF:.*]] = fir.alloca f32 {bindc_name = "h", uniq_name = "_QFEh"}
Expand All @@ -30,19 +26,19 @@
!CHECK: %[[Y_REF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.atomic.read %[[X_DECL]]#1 = %[[Y_DECL]]#1 memory_order(acquire) hint(uncontended) : !fir.ref<i32>, i32
!CHECK: omp.atomic.read %[[A_DECL]]#1 = %[[B_DECL]]#1 memory_order(relaxed) : !fir.ref<!fir.char<1>>, !fir.char<1>
!CHECK: omp.atomic.read %[[A_DECL]]#1 = %[[B_DECL]]#1 memory_order(relaxed) : !fir.ref<i32>, i32
!CHECK: omp.atomic.read %[[C_DECL]]#1 = %[[D_DECL]]#1 memory_order(seq_cst) hint(contended) : !fir.ref<!fir.logical<4>>, !fir.logical<4>
!CHECK: omp.atomic.read %[[E_DECL]]#1 = %[[F_DECL]]#1 hint(speculative) : !fir.ref<!fir.char<1,8>>, !fir.char<1,8>
!CHECK: omp.atomic.read %[[E_DECL]]#1 = %[[F_DECL]]#1 hint(speculative) : !fir.ref<i32>, i32
!CHECK: omp.atomic.read %[[G_DECL]]#1 = %[[H_DECL]]#1 hint(nonspeculative) : !fir.ref<f32>, f32
!CHECK: omp.atomic.read %[[G_DECL]]#1 = %[[H_DECL]]#1 : !fir.ref<f32>, f32

program OmpAtomic

use omp_lib
integer :: x, y
character :: a, b
integer :: a, b
logical :: c, d
character(8) :: e, f
integer :: e, f
real g, h
!$omp atomic acquire read hint(omp_sync_hint_uncontended)
x = y
Expand Down
38 changes: 38 additions & 0 deletions flang/test/Lower/OpenMP/lastprivate-allocatable.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
! RUN: %flang_fc1 -emit-hlfir -o - -fopenmp %s | FileCheck %s
! RUN: bbc -emit-hlfir -o - -fopenmp %s | FileCheck %s

program lastprivate_allocatable
integer, allocatable :: a
integer :: i
! a is unallocated here
!$omp parallel do lastprivate(a)
do i=1,1
a = 42
enddo
!$omp end parallel do
! a should be allocated here
end program

! CHECK-LABEL: func.func @_QQmain()
! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "a", uniq_name = "_QFEa"}
! CHECK: %[[VAL_1:.*]] = fir.zero_bits !fir.heap<i32>
! CHECK: %[[VAL_2:.*]] = fir.embox %[[VAL_1]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK: fir.store %[[VAL_2]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
! CHECK: omp.parallel {
! create original copy of private variable
! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
! CHECK: %[[VAL_17:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: omp.wsloop {
! CHECK: omp.loop_nest
! [...]
! if this is the last iteration
! CHECK: fir.if %{{.*}} {
! store loop IV
! CHECK: fir.store %{{.*}} to %[[VAL_18]]#1 : !fir.ref<i32>
! assign private variable to original copy: realloc
! CHECK: hlfir.assign %[[VAL_16]]#0 to %[[VAL_3]]#0 realloc : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: }
! CHECK-NEXT: omp.yield
! CHECK-NEXT: }
6 changes: 5 additions & 1 deletion libc/benchmarks/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function(add_benchmark benchmark_name)
"BENCHMARK"
"" # Optional arguments
"" # Single value arguments
"LINK_LIBRARIES" # Multi-value arguments
"LINK_LIBRARIES;DEPENDS" # Multi-value arguments
${ARGN}
)

Expand All @@ -20,6 +20,9 @@ function(add_benchmark benchmark_name)
LINK_LIBRARIES
LibcGpuBenchmark.hermetic
${BENCHMARK_LINK_LIBRARIES}
DEPENDS
libc.src.stdio.printf
${BENCHMARK_DEPENDS}
${BENCHMARK_UNPARSED_ARGUMENTS}
)
get_fq_target_name(${benchmark_name} fq_target_name)
Expand Down Expand Up @@ -55,6 +58,7 @@ add_unittest_framework_library(
libc.src.__support.fixedvector
libc.src.time.clock
libc.benchmarks.gpu.timing.timing
libc.src.stdio.printf
)

add_subdirectory(src)
62 changes: 48 additions & 14 deletions libc/benchmarks/gpu/LibcGpuBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "src/__support/GPU/utils.h"
#include "src/__support/fixedvector.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf.h"
#include "src/time/gpu/time_utils.h"

namespace LIBC_NAMESPACE_DECL {
Expand Down Expand Up @@ -73,10 +74,16 @@ struct AtomicBenchmarkSums {
};

AtomicBenchmarkSums all_results;
const char *header_format_string =
"Benchmark | Cycles | Min | Max | Iterations | "
"Time | Stddev | Threads |\n";
const char *output_format_string =
"%-20s |%8ld |%8ld |%8ld |%11ld |%9ld %2s |%9ld |%9d |\n";

constexpr auto GREEN = "\033[32m";
constexpr auto RESET = "\033[0m";

void print_results(Benchmark *b) {
constexpr auto GREEN = "\033[32m";
constexpr auto RESET = "\033[0m";

BenchmarkResult result;
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
Expand All @@ -92,32 +99,59 @@ void print_results(Benchmark *b) {
all_results.samples_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
result.total_iterations =
all_results.iterations_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
result.total_time =
const uint64_t duration_ns =
all_results.time_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
const uint64_t duration_us = duration_ns / 1000;
const uint64_t duration_ms = duration_ns / (1000 * 1000);
uint64_t converted_duration = duration_ns;
cpp::string time_unit;
if (duration_ms != 0) {
converted_duration = duration_ms;
time_unit = cpp::string("ms");
} else if (duration_us != 0) {
converted_duration = duration_us;
time_unit = cpp::string("us");
} else {
converted_duration = duration_ns;
time_unit = cpp::string("ns");
}
result.total_time = converted_duration;
// result.total_time =
// all_results.time_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);

log << GREEN << "[ RUN ] " << RESET << b->get_name() << '\n';
log << GREEN << "[ OK ] " << RESET << b->get_name() << ": "
<< result.cycles << " cycles, " << result.min << " min, " << result.max
<< " max, " << result.total_iterations << " iterations, "
<< result.total_time << " ns, "
<< static_cast<uint64_t>(result.standard_deviation)
<< " stddev (num threads: " << num_threads << ")\n";
LIBC_NAMESPACE::printf(
output_format_string, b->get_test_name().data(), result.cycles,
result.min, result.max, result.total_iterations, result.total_time,
time_unit.data(), static_cast<uint64_t>(result.standard_deviation),
num_threads);
}

void print_header() {
LIBC_NAMESPACE::printf("%s", GREEN);
LIBC_NAMESPACE::printf("Running Suite: %-10s\n",
benchmarks[0]->get_suite_name().data());
LIBC_NAMESPACE::printf("%s", RESET);
LIBC_NAMESPACE::printf(header_format_string);
LIBC_NAMESPACE::printf(
"---------------------------------------------------------------------"
"--------------------------------\n");
}

void Benchmark::run_benchmarks() {
uint64_t id = gpu::get_thread_id();

if (id == 0)
print_header();

gpu::sync_threads();

for (Benchmark *b : benchmarks) {
if (id == 0)
all_results.reset();

gpu::sync_threads();
if (!b->flags ||
((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
id < gpu::get_lane_size())) {
if (b->num_threads == static_cast<uint32_t>(-1) || id < b->num_threads) {
auto current_result = b->run();
all_results.update(current_result);
}
Expand Down
33 changes: 19 additions & 14 deletions libc/benchmarks/gpu/LibcGpuBenchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,26 @@ struct BenchmarkResult {
clock_t total_time = 0;
};

enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };

BenchmarkResult benchmark(const BenchmarkOptions &options,
cpp::function<uint64_t(void)> wrapper_func);

class Benchmark {
const cpp::function<uint64_t(void)> func;
const cpp::string_view name;
const uint8_t flags;
const cpp::string_view suite_name;
const cpp::string_view test_name;
const uint32_t num_threads;

public:
Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
: func(func), name(name), flags(flags) {
Benchmark(cpp::function<uint64_t(void)> func, char const *suite_name,
char const *test_name, uint32_t num_threads)
: func(func), suite_name(suite_name), test_name(test_name),
num_threads(num_threads) {
add_benchmark(this);
}

static void run_benchmarks();
const cpp::string_view get_name() const { return name; }
const cpp::string_view get_suite_name() const { return suite_name; }
const cpp::string_view get_test_name() const { return test_name; }

protected:
static void add_benchmark(Benchmark *benchmark);
Expand All @@ -105,18 +107,21 @@ class Benchmark {
} // namespace benchmarks
} // namespace LIBC_NAMESPACE_DECL

// Passing -1 indicates the benchmark should be run with as many threads as
// allocated by the user in the benchmark's CMake.
#define BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName, 0)
Func, #SuiteName, #TestName, -1)

#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
#define BENCHMARK_N_THREADS(SuiteName, TestName, Func, NumThreads) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
Func, #SuiteName, #TestName, NumThreads)

#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
BENCHMARK_N_THREADS(SuiteName, TestName, Func, 1)

#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
BENCHMARK_N_THREADS(SuiteName, TestName, Func, \
LIBC_NAMESPACE::gpu::get_lane_size())

#endif
10 changes: 10 additions & 0 deletions libc/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@
"doc": "Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB)."
}
},
"unistd": {
"LIBC_CONF_ENABLE_TID_CACHE": {
"value": true,
"doc": "Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications."
},
"LIBC_CONF_ENABLE_PID_CACHE": {
"value": true,
"doc": "Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications."
}
},
"math": {
"LIBC_CONF_MATH_OPTIMIZATIONS": {
"value": 0,
Expand Down
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.cos
libc.src.math.cosf
libc.src.math.cospif
libc.src.math.dsqrtl
libc.src.math.erff
libc.src.math.exp
libc.src.math.expf
Expand Down
1 change: 1 addition & 0 deletions libc/config/darwin/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ set(TARGET_LIBM_ENTRYPOINTS
#libc.src.math.ceill
#libc.src.math.coshf
#libc.src.math.cosf
#libc.src.math.dsqrtl
#libc.src.math.expf
#libc.src.math.exp2f
#libc.src.math.expm1f
Expand Down
3 changes: 3 additions & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.geteuid
libc.src.unistd.getpid
libc.src.unistd.getppid
libc.src.unistd.gettid
libc.src.unistd.getuid
libc.src.unistd.isatty
libc.src.unistd.link
Expand Down Expand Up @@ -358,6 +359,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.coshf
libc.src.math.cospif
libc.src.math.dmull
libc.src.math.dsqrtl
libc.src.math.erff
libc.src.math.exp
libc.src.math.exp10
Expand Down Expand Up @@ -597,6 +599,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
# math.h C23 _Float128 entrypoints
libc.src.math.ceilf128
libc.src.math.copysignf128
libc.src.math.dsqrtf128
libc.src.math.fabsf128
libc.src.math.fdimf128
libc.src.math.floorf128
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.cos
libc.src.math.cosf
libc.src.math.coshf
libc.src.math.dsqrtl
libc.src.math.erff
libc.src.math.exp
libc.src.math.exp10
Expand Down
118 changes: 108 additions & 10 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.ctype.tolower
libc.src.ctype.toupper

# dlfcn.h entrypoints
libc.src.dlfcn.dlclose
libc.src.dlfcn.dlerror
libc.src.dlfcn.dlopen
libc.src.dlfcn.dlsym

# errno.h entrypoints
libc.src.errno.errno

Expand Down Expand Up @@ -52,6 +58,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.mempcpy
libc.src.string.memrchr
libc.src.string.memset
libc.src.string.memset_explicit
libc.src.string.rindex
libc.src.string.stpcpy
libc.src.string.stpncpy
Expand Down Expand Up @@ -180,6 +187,9 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.qsort_r
libc.src.stdlib.rand
libc.src.stdlib.srand
libc.src.stdlib.strfromd
libc.src.stdlib.strfromf
libc.src.stdlib.strfroml
libc.src.stdlib.strtod
libc.src.stdlib.strtof
libc.src.stdlib.strtol
Expand All @@ -197,6 +207,7 @@ set(TARGET_LIBC_ENTRYPOINTS

# stdio.h entrypoints
libc.src.stdio.fdopen
libc.src.stdio.fileno
libc.src.stdio.fprintf
libc.src.stdio.fscanf
libc.src.stdio.printf
Expand All @@ -211,6 +222,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdio.vsnprintf
libc.src.stdio.vsprintf

# sys/epoll.h entrypoints
libc.src.sys.epoll.epoll_create
libc.src.sys.epoll.epoll_create1
libc.src.sys.epoll.epoll_ctl
libc.src.sys.epoll.epoll_pwait
libc.src.sys.epoll.epoll_wait
libc.src.sys.epoll.epoll_pwait2

# sys/mman.h entrypoints
libc.src.sys.mman.madvise
libc.src.sys.mman.mincore
Expand Down Expand Up @@ -247,6 +266,10 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.sys.stat.mkdirat
libc.src.sys.stat.stat

# sys/statvfs.h
libc.src.sys.statvfs.fstatvfs
libc.src.sys.statvfs.statvfs

# sys/utsname.h entrypoints
libc.src.sys.utsname.uname

Expand All @@ -261,12 +284,6 @@ set(TARGET_LIBC_ENTRYPOINTS
# sys/auxv.h entrypoints
libc.src.sys.auxv.getauxval

# sys/epoll.h entrypoints
# Disabled due to epoll_wait syscalls not being available on this platform.
# libc.src.sys.epoll.epoll_wait
# libc.src.sys.epoll.epoll_pwait
# libc.src.sys.epoll.epoll_pwait2

# termios.h entrypoints
libc.src.termios.cfgetispeed
libc.src.termios.cfgetospeed
Expand Down Expand Up @@ -296,12 +313,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.geteuid
libc.src.unistd.getpid
libc.src.unistd.getppid
libc.src.unistd.gettid
libc.src.unistd.getuid
libc.src.unistd.isatty
libc.src.unistd.link
libc.src.unistd.linkat
libc.src.unistd.lseek
libc.src.unistd.pathconf
libc.src.unistd.pipe
libc.src.unistd.pread
libc.src.unistd.pwrite
libc.src.unistd.read
Expand Down Expand Up @@ -347,6 +366,9 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.canonicalize
libc.src.math.canonicalizef
libc.src.math.canonicalizel
libc.src.math.cbrt
libc.src.math.cbrtf
libc.src.math.ceil
Expand All @@ -359,12 +381,15 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.cosf
libc.src.math.coshf
libc.src.math.cospif
libc.src.math.dmull
libc.src.math.dsqrtl
libc.src.math.erff
libc.src.math.exp
libc.src.math.exp10
libc.src.math.exp10f
libc.src.math.exp2
libc.src.math.exp2f
libc.src.math.exp2m1f
libc.src.math.expf
libc.src.math.expm1
libc.src.math.expm1f
Expand Down Expand Up @@ -414,6 +439,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.fmodf
libc.src.math.fmodl
libc.src.math.fmul
libc.src.math.fmull
libc.src.math.frexp
libc.src.math.frexpf
libc.src.math.frexpl
Expand All @@ -423,6 +449,8 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.fromfpx
libc.src.math.fromfpxf
libc.src.math.fromfpxl
libc.src.math.fsqrt
libc.src.math.fsqrtl
libc.src.math.hypot
libc.src.math.hypotf
libc.src.math.ilogb
Expand Down Expand Up @@ -492,6 +520,9 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.rintf
libc.src.math.rintl
libc.src.math.round
libc.src.math.roundeven
libc.src.math.roundevenf
libc.src.math.roundevenl
libc.src.math.roundf
libc.src.math.roundl
libc.src.math.scalbn
Expand Down Expand Up @@ -523,8 +554,11 @@ set(TARGET_LIBM_ENTRYPOINTS
if(LIBC_TYPES_HAS_FLOAT128)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# math.h C23 _Float128 entrypoints
libc.src.math.canonicalizef128
libc.src.math.ceilf128
libc.src.math.copysignf128
libc.src.math.dmulf128
libc.src.math.dsqrtf128
libc.src.math.fabsf128
libc.src.math.fdimf128
libc.src.math.floorf128
Expand All @@ -539,9 +573,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
libc.src.math.fminimum_numf128
libc.src.math.fminimumf128
libc.src.math.fmodf128
libc.src.math.fmulf128
libc.src.math.frexpf128
libc.src.math.fromfpf128
libc.src.math.fromfpxf128
libc.src.math.fsqrtf128
libc.src.math.ilogbf128
libc.src.math.ldexpf128
libc.src.math.llogbf128
Expand All @@ -556,7 +592,9 @@ if(LIBC_TYPES_HAS_FLOAT128)
libc.src.math.nextafterf128
libc.src.math.nextdownf128
libc.src.math.nextupf128
libc.src.math.remquof128
libc.src.math.rintf128
libc.src.math.roundevenf128
libc.src.math.roundf128
libc.src.math.scalbnf128
libc.src.math.sqrtf128
Expand All @@ -566,14 +604,47 @@ if(LIBC_TYPES_HAS_FLOAT128)
)
endif()

if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
libc.src.stdfix.abshk
libc.src.stdfix.abshr
libc.src.stdfix.absk
libc.src.stdfix.abslk
libc.src.stdfix.abslr
libc.src.stdfix.absr
libc.src.stdfix.exphk
libc.src.stdfix.expk
libc.src.stdfix.roundhk
libc.src.stdfix.roundhr
libc.src.stdfix.roundk
libc.src.stdfix.roundlk
libc.src.stdfix.roundlr
libc.src.stdfix.roundr
libc.src.stdfix.rounduhk
libc.src.stdfix.rounduhr
libc.src.stdfix.rounduk
libc.src.stdfix.roundulk
libc.src.stdfix.roundulr
libc.src.stdfix.roundur
libc.src.stdfix.sqrtuhk
libc.src.stdfix.sqrtuhr
libc.src.stdfix.sqrtuk
libc.src.stdfix.sqrtur
libc.src.stdfix.sqrtulr
libc.src.stdfix.uhksqrtus
libc.src.stdfix.uksqrtui
)
endif()

if(LLVM_LIBC_FULL_BUILD)
list(APPEND TARGET_LIBC_ENTRYPOINTS
# compiler entrypoints (no corresponding header)
libc.src.compiler.__stack_chk_fail

# assert.h entrypoints
libc.src.assert.__assert_fail

# compiler entrypoints (no corresponding header)
libc.src.compiler.__stack_chk_fail

# dirent.h entrypoints
libc.src.dirent.closedir
libc.src.dirent.dirfd
Expand All @@ -598,6 +669,12 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.pthread.pthread_attr_setguardsize
libc.src.pthread.pthread_attr_setstack
libc.src.pthread.pthread_attr_setstacksize
libc.src.pthread.pthread_condattr_destroy
libc.src.pthread.pthread_condattr_getclock
libc.src.pthread.pthread_condattr_getpshared
libc.src.pthread.pthread_condattr_init
libc.src.pthread.pthread_condattr_setclock
libc.src.pthread.pthread_condattr_setpshared
libc.src.pthread.pthread_create
libc.src.pthread.pthread_detach
libc.src.pthread.pthread_equal
Expand All @@ -620,6 +697,21 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.pthread.pthread_mutexattr_setrobust
libc.src.pthread.pthread_mutexattr_settype
libc.src.pthread.pthread_once
libc.src.pthread.pthread_rwlock_destroy
libc.src.pthread.pthread_rwlock_init
libc.src.pthread.pthread_rwlock_rdlock
libc.src.pthread.pthread_rwlock_timedrdlock
libc.src.pthread.pthread_rwlock_timedwrlock
libc.src.pthread.pthread_rwlock_tryrdlock
libc.src.pthread.pthread_rwlock_trywrlock
libc.src.pthread.pthread_rwlock_unlock
libc.src.pthread.pthread_rwlock_wrlock
libc.src.pthread.pthread_rwlockattr_destroy
libc.src.pthread.pthread_rwlockattr_getkind_np
libc.src.pthread.pthread_rwlockattr_getpshared
libc.src.pthread.pthread_rwlockattr_init
libc.src.pthread.pthread_rwlockattr_setkind_np
libc.src.pthread.pthread_rwlockattr_setpshared
libc.src.pthread.pthread_self
libc.src.pthread.pthread_setname_np
libc.src.pthread.pthread_setspecific
Expand All @@ -643,7 +735,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdio.fgetc
libc.src.stdio.fgetc_unlocked
libc.src.stdio.fgets
libc.src.stdio.fileno
libc.src.stdio.flockfile
libc.src.stdio.fopen
libc.src.stdio.fopencookie
Expand All @@ -652,7 +743,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdio.fread
libc.src.stdio.fread_unlocked
libc.src.stdio.fseek
libc.src.stdio.fseeko
libc.src.stdio.ftell
libc.src.stdio.ftello
libc.src.stdio.funlockfile
libc.src.stdio.fwrite
libc.src.stdio.fwrite_unlocked
Expand All @@ -673,9 +766,11 @@ if(LLVM_LIBC_FULL_BUILD)
# stdlib.h entrypoints
libc.src.stdlib._Exit
libc.src.stdlib.abort
libc.src.stdlib.at_quick_exit
libc.src.stdlib.atexit
libc.src.stdlib.exit
libc.src.stdlib.getenv
libc.src.stdlib.quick_exit

# signal.h entrypoints
libc.src.signal.kill
Expand Down Expand Up @@ -757,6 +852,9 @@ if(LLVM_LIBC_FULL_BUILD)

# sys/select.h entrypoints
libc.src.sys.select.select

# sys/socket.h entrypoints
libc.src.sys.socket.socket
)
endif()

Expand Down
8 changes: 5 additions & 3 deletions libc/config/linux/riscv/headers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.assert
libc.include.ctype
libc.include.dirent
libc.include.dlfcn
libc.include.errno
libc.include.fcntl
libc.include.features
Expand All @@ -18,6 +19,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.setjmp
libc.include.stdckdint
libc.include.stdbit
libc.include.stdfix
libc.include.stdio
libc.include.stdlib
libc.include.string
Expand All @@ -33,17 +35,17 @@ set(TARGET_PUBLIC_HEADERS
libc.include.arpa_inet

libc.include.sys_auxv
# Disabled due to epoll_wait syscalls not being available on this platform.
# libc.include.sys_epoll
libc.include.sys_epoll
libc.include.sys_ioctl
libc.include.sys_mman
libc.include.sys_prctl
libc.include.sys_random
libc.include.sys_queue
libc.include.sys_random
libc.include.sys_resource
libc.include.sys_select
libc.include.sys_socket
libc.include.sys_stat
libc.include.sys_statvfs
libc.include.sys_syscall
libc.include.sys_time
libc.include.sys_types
Expand Down
10 changes: 7 additions & 3 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.sys.epoll.epoll_ctl
libc.src.sys.epoll.epoll_pwait
libc.src.sys.epoll.epoll_wait
# TODO: Need to check if pwait2 is available before providing.
# https://github.com/llvm/llvm-project/issues/80060
# libc.src.sys.epoll.epoll_pwait2
libc.src.sys.epoll.epoll_pwait2

# sys/mman.h entrypoints
libc.src.sys.mman.madvise
Expand Down Expand Up @@ -315,6 +313,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.geteuid
libc.src.unistd.getpid
libc.src.unistd.getppid
libc.src.unistd.gettid
libc.src.unistd.getuid
libc.src.unistd.isatty
libc.src.unistd.link
Expand Down Expand Up @@ -383,6 +382,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.coshf
libc.src.math.cospif
libc.src.math.dmull
libc.src.math.dsqrtl
libc.src.math.erff
libc.src.math.exp
libc.src.math.exp10
Expand Down Expand Up @@ -449,6 +449,8 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.fromfpx
libc.src.math.fromfpxf
libc.src.math.fromfpxl
libc.src.math.fsqrt
libc.src.math.fsqrtl
libc.src.math.hypot
libc.src.math.hypotf
libc.src.math.ilogb
Expand Down Expand Up @@ -642,6 +644,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
libc.src.math.ceilf128
libc.src.math.copysignf128
libc.src.math.dmulf128
libc.src.math.dsqrtf128
libc.src.math.fabsf128
libc.src.math.fdimf128
libc.src.math.floorf128
Expand All @@ -660,6 +663,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
libc.src.math.frexpf128
libc.src.math.fromfpf128
libc.src.math.fromfpxf128
libc.src.math.fsqrtf128
libc.src.math.ilogbf128
libc.src.math.ldexpf128
libc.src.math.llogbf128
Expand Down
3 changes: 3 additions & 0 deletions libc/docs/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,6 @@ to learn about the defaults for your platform and target.
* **"string" options**
- ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
- ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
* **"unistd" options**
- ``LIBC_CONF_ENABLE_PID_CACHE``: Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications.
- ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications.
23 changes: 23 additions & 0 deletions libc/docs/dev/undefined_behavior.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,26 @@ direction in this case.
Non-const Constant Return Values
--------------------------------
Some libc functions, like ``dlerror()``, return ``char *`` instead of ``const char *`` and then tell the caller they promise not to to modify this value. Any modification of this value is undefined behavior.

Cached ``getpid/gettid``
------------------------
Since version ``2.25``, glibc removes its cache mechanism for ``getpid/gettid``
(See the history section in https://man7.org/linux/man-pages/man2/getpid.2.html).
LLVM's libc still implements the cache as it is useful for fast deadlock detection.
The cache mechanism is also implemented in MUSL and bionic. The tid/pid cache can
be disabled by setting ``LIBC_CONF_ENABLE_TID_CACHE`` and ``LIBC_CONF_ENABLE_PID_CACHE``
to ``false`` respectively.

Unwrapped ``SYS_clone/SYS_fork/SYS_vfork``
------------------------------------------
It is highly discouraged to use unwrapped ``SYS_clone/SYS_fork/SYS_vfork``.
First, calling such syscalls without provided libc wrappers ignores
all the ``pthread_atfork`` entries as libc can no longer detect the ``fork``.
Second, libc relies on the ``fork/clone`` wrappers to correctly maintain cache for
process id and thread id, and other important process-specific states such as the list
of robust mutexes. Third, even if the user is to call ``exec*`` functions immediately,
there can still be other unexpected issues. For instance, there can be signal handlers
inherited from parent process triggered inside the instruction window between ``fork``
and ``exec*``. As libc failed to maintain its internal states correctly, even though the
functions used inside the signal handlers are marked as ``async-signal-safe`` (such as
``getpid``), they will still return wrong values or lead to other even worse situations.
4 changes: 2 additions & 2 deletions libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| cospi | |check| | | | | | 7.12.4.12 | F.10.1.12 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| dsqrt | N/A | N/A | | N/A | | 7.12.14.6 | F.10.11 |
| dsqrt | N/A | N/A | |check| | N/A | |check|\* | 7.12.14.6 | F.10.11 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| erf | |check| | | | | | 7.12.8.1 | F.10.5.1 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand All @@ -300,7 +300,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| f16sqrt | |check|\* | |check|\* | |check|\* | N/A | |check| | 7.12.14.6 | F.10.11 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fsqrt | N/A | | | N/A | | 7.12.14.6 | F.10.11 |
| fsqrt | N/A | |check| | |check| | N/A | |check|\* | 7.12.14.6 | F.10.11 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| hypot | |check| | |check| | | | | 7.12.7.4 | F.10.4.4 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
11 changes: 4 additions & 7 deletions libc/include/assert.h.def
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,19 @@
// This file may be usefully included multiple times to change assert()'s
// definition based on NDEBUG.


#undef assert
#ifdef NDEBUG
#define assert(e) (void)0
#else

#ifndef __cplusplus
#undef static_assert
#define static_assert _Static_assert
#endif

#undef assert
#ifdef NDEBUG
#define assert(e) (void)0
#else
#ifdef __cplusplus
extern "C"
#endif
_Noreturn void __assert_fail(const char *, const char *, unsigned, const char *) __NOEXCEPT;

#define assert(e) \
((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__))
#endif
Expand Down
2 changes: 1 addition & 1 deletion libc/include/llvm-libc-types/fsblkcnt_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
#ifndef LLVM_LIBC_TYPES_FSBLKCNT_T_H
#define LLVM_LIBC_TYPES_FSBLKCNT_T_H

typedef __SIZE_TYPE__ fsblkcnt_t;
typedef __UINT64_TYPE__ fsblkcnt_t;

#endif // LLVM_LIBC_TYPES_FSBLKCNT_T_H
Loading