Skip to content

Commit

Permalink
Expose -mllvm -accurate-sample-profile to clang.
Browse files Browse the repository at this point in the history
Summary: With accurate sample profile, we can do more aggressive size optimization. For some size-critical application, this can reduce the text size by 20%

Reviewers: davidxl, rsmith

Reviewed By: davidxl, rsmith

Subscribers: mehdi_amini, eraman, sanjoy, cfe-commits

Differential Revision: https://reviews.llvm.org/D37091

llvm-svn: 311707
  • Loading branch information
danielcdh committed Aug 24, 2017
1 parent f0e27e6 commit 5e97f23
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 0 deletions.
13 changes: 13 additions & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -637,12 +637,25 @@ def fno_profile_sample_use : Flag<["-"], "fno-profile-sample-use">, Group<f_Grou
def fprofile_sample_use_EQ : Joined<["-"], "fprofile-sample-use=">,
Group<f_Group>, Flags<[DriverOption, CC1Option]>,
HelpText<"Enable sample-based profile guided optimizations">;
def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">,
Group<f_Group>, Flags<[DriverOption, CC1Option]>,
HelpText<"Specifies that the sample profile is accurate">,
DocBrief<[{Specifies that the sample profile is accurate. If the sample
profile is accurate, callsites without profile samples are marked
as cold. Otherwise, treat callsites without profile samples as if
we have no profile}]>;
def fno_profile_sample_accurate : Flag<["-"], "fno-profile-sample-accurate">,
Group<f_Group>, Flags<[DriverOption]>;
def fauto_profile : Flag<["-"], "fauto-profile">, Group<f_Group>,
Alias<fprofile_sample_use>;
def fno_auto_profile : Flag<["-"], "fno-auto-profile">, Group<f_Group>,
Alias<fno_profile_sample_use>;
def fauto_profile_EQ : Joined<["-"], "fauto-profile=">,
Alias<fprofile_sample_use_EQ>;
def fauto_profile_accurate : Flag<["-"], "fauto-profile-accurate">,
Group<f_Group>, Alias<fprofile_sample_accurate>;
def fno_auto_profile_accurate : Flag<["-"], "fno-auto-profile-accurate">,
Group<f_Group>, Alias<fno_profile_sample_accurate>;
def fdebug_info_for_profiling : Flag<["-"], "fdebug-info-for-profiling">, Group<f_Group>,
Flags<[CC1Option]>,
HelpText<"Emit extra debug info to make sample profile more accurate.">;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Frontend/CodeGenOptions.def
Expand Up @@ -183,6 +183,7 @@ CODEGENOPT(UnsafeFPMath , 1, 0) ///< Allow unsafe floating point optzns.
CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.

/// Attempt to use register sized accesses to bit-fields in structures, when
/// possible.
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Expand Up @@ -838,6 +838,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr("no-jump-tables",
llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));

// Add profile-sample-accurate value.
if (CGM.getCodeGenOpts().ProfileSampleAccurate)
Fn->addFnAttr("profile-sample-accurate");

if (getLangOpts().OpenCL) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -2340,6 +2340,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
true))
CmdArgs.push_back("-fno-jump-tables");

if (Args.hasFlag(options::OPT_fprofile_sample_accurate,
options::OPT_fno_profile_sample_accurate, false))
CmdArgs.push_back("-fprofile-sample-accurate");

if (!Args.hasFlag(options::OPT_fpreserve_as_comments,
options::OPT_fno_preserve_as_comments, true))
CmdArgs.push_back("-fno-preserve-as-comments");
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -652,6 +652,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,

Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);

Opts.ProfileSampleAccurate = Args.hasArg(OPT_fprofile_sample_accurate);

Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
Opts.EmitSummaryIndex = false;
if (Arg *A = Args.getLastArg(OPT_flto_EQ)) {
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGen/profile-sample-accurate.c
@@ -0,0 +1,7 @@
// Test to ensure -emit-llvm profile-sample-accurate is honored by clang.
// RUN: %clang -S -emit-llvm %s -fprofile-sample-accurate -o - | FileCheck %s

// CHECK: define void @foo()
// CHECK: attributes {{.*}} "profile-sample-accurate"
void foo() {
}
3 changes: 3 additions & 0 deletions clang/test/Driver/clang_f_opts.c
Expand Up @@ -53,6 +53,9 @@
// CHECK-REROLL-LOOPS: "-freroll-loops"
// CHECK-NO-REROLL-LOOPS-NOT: "-freroll-loops"

// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"

// RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
// CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"

Expand Down
9 changes: 9 additions & 0 deletions clang/test/Integration/thinlto_profile_sample_accurate.c
@@ -0,0 +1,9 @@
// Test to ensure -emit-llvm profile-sample-accurate is honored in ThinLTO.
// RUN: %clang -O2 %s -flto=thin -fprofile-sample-accurate -c -o %t.o
// RUN: llvm-lto -thinlto -o %t %t.o
// RUN: %clang_cc1 -O2 -x ir %t.o -fthinlto-index=%t.thinlto.bc -emit-llvm -o - | FileCheck %s

// CHECK: define void @foo()
// CHECK: attributes {{.*}} "profile-sample-accurate"
void foo() {
}

0 comments on commit 5e97f23

Please sign in to comment.