diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0dbe667e4f07a..23d9fb246977a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -221,6 +221,16 @@ Attribute Changes in Clang foreign language personality with a given function. Note that this does not perform any ABI validation for the personality routine. +- The ``__attribute__((flatten))`` attribute behavior has changed to match + GCC. Previously, Clang only inlined direct callees of the attributed + function. Now, all calls are inlined transitively, including calls + introduced by inlining. Calls that cannot be inlined are left as-is: + this includes callees marked ``noinline``, callees with incompatible ABI + attributes (e.g. SME), callees without a visible definition, and + recursive calls where a function already appears in the inlining chain. + Flatten also works across ThinLTO module boundaries when callee + definitions are available. + - The :doc:`ThreadSafetyAnalysis` attributes ``guarded_by`` and ``pt_guarded_by`` now accept multiple capability arguments with refined access semantics: *writing* requires all listed capabilities to be held diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 4c0ea9ec3ea9c..5470f33d1ec7e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5968,17 +5968,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. - // Apply always_inline to all calls within flatten functions. - // FIXME: should this really take priority over __try, below? - if (CurCodeDecl && CurCodeDecl->hasAttr() && - !InNoInlineAttributedStmt && - !(TargetDecl && TargetDecl->hasAttr()) && - !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI( - CallerDecl, CalleeDecl)) { - Attrs = - Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); - } - // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3fcd6f5f904db..090ff06e1c555 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2946,6 +2946,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (CodeGenOpts.DisableOutlining || D->hasAttr()) B.addAttribute(llvm::Attribute::NoOutline); + if (D->hasAttr()) + B.addAttribute(llvm::Attribute::Flatten); + F->addFnAttrs(B); llvm::MaybeAlign ExplicitAlignment; diff --git a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c index 2071e66e0d652..baa02926d9bf7 100644 --- a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c +++ b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s --check-prefix=CHECK-FLATTEN +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s --check-prefix=CHECK-ALWAYS-INLINE // REQUIRES: aarch64-registered-target @@ -31,14 +31,26 @@ void caller(void) { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming -// CHECK-NEXT: call void @fn_locally_streaming -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: fn() and fn_streaming_compatible() are inlined, streaming functions +// are blocked by TTI (non-streaming caller), new_za/new_zt0 are always blocked. +// CHECK-FLATTEN-LABEL: void @caller() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming +// CHECK-FLATTEN-NEXT: call void @fn_locally_streaming +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang's wouldInliningViolateFunctionCallABI controls. +// CHECK-ALWAYS-INLINE-LABEL: void @caller() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_locally_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible { STMT_ATTR fn(); @@ -48,14 +60,26 @@ FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_streaming_compatible() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming -// CHECK-NEXT: call void @fn_locally_streaming -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: TTI allows inlining fn(), fn_streaming_compatible(), fn_streaming(), +// fn_locally_streaming() because they don't have incompatible ops. Only new_za/new_zt0 blocked. +// CHECK-FLATTEN-LABEL: void @caller_streaming_compatible() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn() (streaming-compatible caller, non-streaming callee). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming_compatible() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_locally_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR void caller_streaming(void) __arm_streaming { STMT_ATTR fn(); @@ -65,14 +89,26 @@ FN_ATTR void caller_streaming(void) __arm_streaming { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_streaming() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: TTI allows all except new_za/new_zt0. fn() is inlined because +// streaming caller can execute non-streaming callee's code (no incompatible ops). +// CHECK-FLATTEN-LABEL: void @caller_streaming() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn() (streaming caller, non-streaming callee). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR __arm_locally_streaming void caller_locally_streaming(void) { @@ -83,11 +119,22 @@ void caller_locally_streaming(void) { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_locally_streaming() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: Similar to caller_streaming - TTI allows all except new_za/new_zt0. +// CHECK-FLATTEN-LABEL: void @caller_locally_streaming() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn(). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_locally_streaming() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c index 4e762223de486..3d3b5928c366d 100644 --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,19 +1,8 @@ -// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s - -void f(void) {} - -__attribute__((noinline)) void ni(void) {} +// RUN: %clang_cc1 -triple=x86_64-linux-gnu -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]] __attribute__((flatten)) -// CHECK: define{{.*}} void @g() void g(void) { - // CHECK-NOT: call {{.*}} @f - f(); - // CHECK: call {{.*}} @ni - ni(); } -void h(void) { - // CHECK: call {{.*}} @f - f(); -} +// CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}}