Skip to content

Commit

Permalink
[Bitfield] Add an option to access bitfield in a fine-grained manner.
Browse files Browse the repository at this point in the history
Currently all the consecutive bitfields are wrapped as a large integer unless there is unamed zero sized bitfield in between. The patch provides an alternative manner which makes the bitfield to be accessed as separate memory location if it has legal integer width and is naturally aligned. Such separate bitfield may split the original consecutive bitfields into subgroups of consecutive bitfields, and each subgroup will be wrapped as an integer. Now This is all controlled by an option -ffine-grained-bitfield-accesses. The alternative of bitfield access manner can improve the access efficiency of those bitfields with legal width and being aligned, but may reduce the chance of load/store combining of other bitfields, so it depends on how the bitfields are defined and actually accessed to choose when to use the option. For now the option is off by default.

Differential revision: https://reviews.llvm.org/D36562

llvm-svn: 315915
  • Loading branch information
wmi-11 committed Oct 16, 2017
1 parent e8c1a54 commit 9b3d627
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 2 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Expand Up @@ -330,4 +330,8 @@ def warn_drv_msvc_not_found : Warning<
"unable to find a Visual Studio installation; "
"try running Clang from a developer command prompt">,
InGroup<DiagGroup<"msvc-not-found">>;

def warn_drv_fine_grained_bitfield_accesses_ignored : Warning<
"option '-ffine-grained-bitfield-accesses' cannot be enabled together with a sanitizer; flag ignored">,
InGroup<OptionIgnored>;
}
7 changes: 7 additions & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -1045,6 +1045,13 @@ def fxray_never_instrument :
Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Filename defining the whitelist for imbuing the 'never instrument' XRay attribute.">;

def ffine_grained_bitfield_accesses : Flag<["-"],
"ffine-grained-bitfield-accesses">, Group<f_clang_Group>, Flags<[CC1Option]>,
HelpText<"Use separate accesses for bitfields with legal widths and alignments.">;
def fno_fine_grained_bitfield_accesses : Flag<["-"],
"fno-fine-grained-bitfield-accesses">, Group<f_clang_Group>, Flags<[CC1Option]>,
HelpText<"Use large-integer access for consecutive bitfield runs.">;

def flat__namespace : Flag<["-"], "flat_namespace">;
def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group<f_Group>;
def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group<f_Group>;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Frontend/CodeGenOptions.def
Expand Up @@ -179,6 +179,7 @@ CODEGENOPT(SanitizeCoverageStackDepth, 1, 0) ///< Enable max stack depth tracing
CODEGENOPT(SanitizeStats , 1, 0) ///< Collect statistics for sanitizers.
CODEGENOPT(SimplifyLibCalls , 1, 1) ///< Set when -fbuiltin is enabled.
CODEGENOPT(SoftFloat , 1, 0) ///< -soft-float.
CODEGENOPT(FineGrainedBitfieldAccesses, 1, 0) ///< Enable fine-grained bitfield accesses.
CODEGENOPT(StrictEnums , 1, 0) ///< Optimize based on strict enum definition.
CODEGENOPT(StrictVTablePointers, 1, 0) ///< Optimize based on the strict vtable pointers
CODEGENOPT(TimePasses , 1, 0) ///< Set when -ftime-report is enabled.
Expand Down
37 changes: 35 additions & 2 deletions clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
Expand Up @@ -403,6 +403,27 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
}
return;
}

// Check if current Field is better as a single field run. When current field
// has legal integer width, and its bitfield offset is naturally aligned, it
// is better to make the bitfield a separate storage component so as it can be
// accessed directly with lower cost.
auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) {
if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses)
return false;
unsigned Width = Field->getBitWidthValue(Context);
if (!DataLayout.isLegalInteger(Width))
return false;
// Make sure Field is natually aligned if it is treated as an IType integer.
if (getFieldBitOffset(*Field) %
Context.toBits(getAlignment(getIntNType(Width))) !=
0)
return false;
return true;
};

// The start field is better as a single field run.
bool StartFieldAsSingleRun = false;
for (;;) {
// Check to see if we need to start a new run.
if (Run == FieldEnd) {
Expand All @@ -414,17 +435,28 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
Run = Field;
StartBitOffset = getFieldBitOffset(*Field);
Tail = StartBitOffset + Field->getBitWidthValue(Context);
StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run);
}
++Field;
continue;
}
// Add bitfields to the run as long as they qualify.
if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 &&

// If the start field of a new run is better as a single run, or
// if current field is better as a single run, or
// if current field has zero width bitfield, or
// if the offset of current field is inconsistent with the offset of
// previous field plus its offset,
// skip the block below and go ahead to emit the storage.
// Otherwise, try to add bitfields to the run.
if (!StartFieldAsSingleRun && Field != FieldEnd &&
!IsBetterAsSingleFieldRun(Field) &&
Field->getBitWidthValue(Context) != 0 &&
Tail == getFieldBitOffset(*Field)) {
Tail += Field->getBitWidthValue(Context);
++Field;
continue;
}

// We've hit a break-point in the run and need to emit a storage field.
llvm::Type *Type = getIntNType(Tail - StartBitOffset);
// Add the storage member to the record and set the bitfield info for all of
Expand All @@ -435,6 +467,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
MemberInfo::Field, nullptr, *Run));
Run = FieldEnd;
StartFieldAsSingleRun = false;
}
}

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -3365,6 +3365,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_optimize_sibling_calls))
CmdArgs.push_back("-mdisable-tail-calls");

Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
options::OPT_fno_fine_grained_bitfield_accesses);

// Handle segmented stacks.
if (Args.hasArg(options::OPT_fsplit_stack))
CmdArgs.push_back("-split-stacks");
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -546,6 +546,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
OPT_fuse_register_sized_bitfield_access);
Opts.RelaxedAliasing = Args.hasArg(OPT_relaxed_aliasing);
Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa);
Opts.FineGrainedBitfieldAccesses =
Args.hasFlag(OPT_ffine_grained_bitfield_accesses,
OPT_fno_fine_grained_bitfield_accesses, false);
Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags);
Opts.MergeAllConstants = !Args.hasArg(OPT_fno_merge_all_constants);
Opts.NoCommon = Args.hasArg(OPT_fno_common);
Expand Down Expand Up @@ -2763,6 +2766,13 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64) {
Res.getDiagnosticOpts().Warnings.push_back("spir-compat");
}

// If sanitizer is enabled, disable OPT_ffine_grained_bitfield_accesses.
if (Res.getCodeGenOpts().FineGrainedBitfieldAccesses &&
!Res.getLangOpts()->Sanitize.empty()) {
Res.getCodeGenOpts().FineGrainedBitfieldAccesses = false;
Diags.Report(diag::warn_drv_fine_grained_bitfield_accesses_ignored);
}
return Success;
}

Expand Down
162 changes: 162 additions & 0 deletions clang/test/CodeGenCXX/finegrain-bitfield-access.cpp
@@ -0,0 +1,162 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
// RUN: -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \
// RUN: -emit-llvm -fsanitize=address -o - %s | FileCheck %s --check-prefix=SANITIZE
// Check -fsplit-bitfields will be ignored since sanitizer is enabled.

struct S1 {
unsigned f1:2;
unsigned f2:6;
unsigned f3:8;
unsigned f4:4;
unsigned f5:8;
};

S1 a1;
unsigned read8_1() {
// CHECK-LABEL: @_Z7read8_1v
// CHECK: %bf.load = load i8, i8* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 1), align 1
// CHECK-NEXT: %bf.cast = zext i8 %bf.load to i32
// CHECK-NEXT: ret i32 %bf.cast
// SANITIZE-LABEL: @_Z7read8_1v
// SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE: %bf.lshr = lshr i32 %bf.load, 8
// SANITIZE: %bf.clear = and i32 %bf.lshr, 255
// SANITIZE: ret i32 %bf.clear
return a1.f3;
}
void write8_1() {
// CHECK-LABEL: @_Z8write8_1v
// CHECK: store i8 3, i8* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 1), align 1
// CHECK-NEXT: ret void
// SANITIZE-LABEL: @_Z8write8_1v
// SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE-NEXT: %bf.clear = and i32 %bf.load, -65281
// SANITIZE-NEXT: %bf.set = or i32 %bf.clear, 768
// SANITIZE-NEXT: store i32 %bf.set, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE-NEXT: ret void
a1.f3 = 3;
}

unsigned read8_2() {
// CHECK-LABEL: @_Z7read8_2v
// CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2
// CHECK-NEXT: %bf.lshr = lshr i16 %bf.load, 4
// CHECK-NEXT: %bf.clear = and i16 %bf.lshr, 255
// CHECK-NEXT: %bf.cast = zext i16 %bf.clear to i32
// CHECK-NEXT: ret i32 %bf.cast
// SANITIZE-LABEL: @_Z7read8_2v
// SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE-NEXT: %bf.lshr = lshr i32 %bf.load, 20
// SANITIZE-NEXT: %bf.clear = and i32 %bf.lshr, 255
// SANITIZE-NEXT: ret i32 %bf.clear
return a1.f5;
}
void write8_2() {
// CHECK-LABEL: @_Z8write8_2v
// CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2
// CHECK-NEXT: %bf.clear = and i16 %bf.load, -4081
// CHECK-NEXT: %bf.set = or i16 %bf.clear, 48
// CHECK-NEXT: store i16 %bf.set, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2
// CHECK-NEXT: ret void
// SANITIZE-LABEL: @_Z8write8_2v
// SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE-NEXT: %bf.clear = and i32 %bf.load, -267386881
// SANITIZE-NEXT: %bf.set = or i32 %bf.clear, 3145728
// SANITIZE-NEXT: store i32 %bf.set, i32* getelementptr inbounds {{.*}}, align 4
// SANITIZE-NEXT: ret void
a1.f5 = 3;
}

struct S2 {
unsigned long f1:16;
unsigned long f2:16;
unsigned long f3:6;
};

S2 a2;
unsigned read16_1() {
// CHECK-LABEL: @_Z8read16_1v
// CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 0), align 8
// CHECK-NEXT: %bf.cast = zext i16 %bf.load to i64
// CHECK-NEXT: %conv = trunc i64 %bf.cast to i32
// CHECK-NEXT: ret i32 %conv
// SANITIZE-LABEL: @_Z8read16_1v
// SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: %bf.clear = and i64 %bf.load, 65535
// SANITIZE-NEXT: %conv = trunc i64 %bf.clear to i32
// SANITIZE-NEXT: ret i32 %conv
return a2.f1;
}
unsigned read16_2() {
// CHECK-LABEL: @_Z8read16_2v
// CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 1), align 2
// CHECK-NEXT: %bf.cast = zext i16 %bf.load to i64
// CHECK-NEXT: %conv = trunc i64 %bf.cast to i32
// CHECK-NEXT: ret i32 %conv
// SANITIZE-LABEL: @_Z8read16_2v
// SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: %bf.lshr = lshr i64 %bf.load, 16
// SANITIZE-NEXT: %bf.clear = and i64 %bf.lshr, 65535
// SANITIZE-NEXT: %conv = trunc i64 %bf.clear to i32
// SANITIZE-NEXT: ret i32 %conv
return a2.f2;
}

void write16_1() {
// CHECK-LABEL: @_Z9write16_1v
// CHECK: store i16 5, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 0), align 8
// CHECK-NEXT: ret void
// SANITIZE-LABEL: @_Z9write16_1v
// SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: %bf.clear = and i64 %bf.load, -65536
// SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 5
// SANITIZE-NEXT: store i64 %bf.set, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: ret void
a2.f1 = 5;
}
void write16_2() {
// CHECK-LABEL: @_Z9write16_2v
// CHECK: store i16 5, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 1), align 2
// CHECK-NEXT: ret void
// SANITIZE-LABEL: @_Z9write16_2v
// SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: %bf.clear = and i64 %bf.load, -4294901761
// SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 327680
// SANITIZE-NEXT: store i64 %bf.set, i64* bitcast {{.*}}, align 8
// SANITIZE-NEXT: ret void
a2.f2 = 5;
}

struct S3 {
unsigned long f1:14;
unsigned long f2:18;
unsigned long f3:32;
};

S3 a3;
unsigned read32_1() {
// CHECK-LABEL: @_Z8read32_1v
// CHECK: %bf.load = load i32, i32* getelementptr inbounds (%struct.S3, %struct.S3* @a3, i32 0, i32 1), align 4
// CHECK-NEXT: %bf.cast = zext i32 %bf.load to i64
// CHECK-NEXT: %conv = trunc i64 %bf.cast to i32
// CHECK-NEXT: ret i32 %conv
// SANITIZE-LABEL: @_Z8read32_1v
// SANITIZE: %bf.load = load i64, i64* getelementptr inbounds {{.*}}, align 8
// SANITIZE-NEXT: %bf.lshr = lshr i64 %bf.load, 32
// SANITIZE-NEXT: %conv = trunc i64 %bf.lshr to i32
// SANITIZE-NEXT: ret i32 %conv
return a3.f3;
}
void write32_1() {
// CHECK-LABEL: @_Z9write32_1v
// CHECK: store i32 5, i32* getelementptr inbounds (%struct.S3, %struct.S3* @a3, i32 0, i32 1), align 4
// CHECK-NEXT: ret void
// SANITIZE-LABEL: @_Z9write32_1v
// SANITIZE: %bf.load = load i64, i64* getelementptr inbounds {{.*}}, align 8
// SANITIZE-NEXT: %bf.clear = and i64 %bf.load, 4294967295
// SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 21474836480
// SANITIZE-NEXT: store i64 %bf.set, i64* getelementptr inbounds {{.*}}, align 8
// SANITIZE-NEXT: ret void
a3.f3 = 5;
}

0 comments on commit 9b3d627

Please sign in to comment.