Skip to content

Commit

Permalink
Retry: [ubsan] Detect UB loads from bitfields
Browse files Browse the repository at this point in the history
It's possible to load out-of-range values from bitfields backed by a
boolean or an enum. Check for UB loads from bitfields.

This is the motivating example:

  struct S {
    BOOL b : 1; // Signed ObjC BOOL.
  };

  S s;
  s.b = 1; // This is actually stored as -1.
  if (s.b == 1) // Evaluates to false, -1 != 1.
    ...

Changes since the original commit:

- Single-bit bools are a special case (see CGF::EmitFromMemory), and we
  can't avoid dealing with them when loading from a bitfield. Don't try to
  insert a check in this case.

Differential Revision: https://reviews.llvm.org/D30423

llvm-svn: 297389
  • Loading branch information
vedantk committed Mar 9, 2017
1 parent 4d297df commit 129edab
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 7 deletions.
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGAtomic.cpp
Expand Up @@ -1181,7 +1181,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
if (LVal.isBitField())
return CGF.EmitLoadOfBitfieldLValue(
LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
LVal.getAlignmentSource()));
LVal.getAlignmentSource()), loc);
if (LVal.isVectorElt())
return CGF.EmitLoadOfLValue(
LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
Expand Down
14 changes: 11 additions & 3 deletions clang/lib/CodeGen/CGExpr.cpp
Expand Up @@ -1327,6 +1327,13 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
if (!NeedsBoolCheck && !NeedsEnumCheck)
return false;

// Single-bit booleans don't need to be checked. Special-case this to avoid
// a bit width mismatch when handling bitfield values. This is handled by
// EmitFromMemory for the non-bitfield case.
if (IsBool &&
cast<llvm::IntegerType>(Value->getType())->getBitWidth() == 1)
return false;

llvm::APInt Min, End;
if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool))
return true;
Expand Down Expand Up @@ -1549,10 +1556,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
return EmitLoadOfGlobalRegLValue(LV);

assert(LV.isBitField() && "Unknown LValue type!");
return EmitLoadOfBitfieldLValue(LV);
return EmitLoadOfBitfieldLValue(LV, Loc);
}

RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV,
SourceLocation Loc) {
const CGBitFieldInfo &Info = LV.getBitFieldInfo();

// Get the output type.
Expand All @@ -1577,7 +1585,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
"bf.clear");
}
Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");

EmitScalarRangeCheck(Val, LV.getType(), Loc);
return RValue::get(Val);
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -2943,7 +2943,7 @@ class CodeGenFunction : public CodeGenTypeCache {
/// rvalue, returning the rvalue.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
RValue EmitLoadOfExtVectorElementLValue(LValue V);
RValue EmitLoadOfBitfieldLValue(LValue LV);
RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc);
RValue EmitLoadOfGlobalRegLValue(LValue LV);

/// EmitStoreThroughLValue - Store the specified rvalue into the specified
Expand Down
34 changes: 34 additions & 0 deletions clang/test/CodeGenCXX/ubsan-bitfields.cpp
@@ -0,0 +1,34 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin10 -emit-llvm -o - %s -fsanitize=bool,enum | FileCheck %s

enum E {
a = 1,
b = 2,
c = 3
};

struct S {
E e1 : 10;
};

// CHECK-LABEL: define i32 @_Z4loadP1S
E load(S *s) {
// CHECK: [[LOAD:%.*]] = load i16, i16* {{.*}}
// CHECK: [[CLEAR:%.*]] = and i16 [[LOAD]], 1023
// CHECK: [[CAST:%.*]] = zext i16 [[CLEAR]] to i32
// CHECK: icmp ule i32 [[CAST]], 3, !nosanitize
// CHECK: call void @__ubsan_handle_load_invalid_value
return s->e1;
}

struct Bool {
bool b1 : 1;
bool b2 : 7;
bool b3 : 16;
};

// CHECK-LABEL: define zeroext i1 @_Z13load_cpp_boolP4Bool
bool load_cpp_bool(Bool *b) {
// CHECK-NOT: call void @__ubsan_handle_load_invalid_value
// CHECK-NOT: !nosanitize
return b->b1 || b->b2 || b->b3;
}
57 changes: 55 additions & 2 deletions clang/test/CodeGenObjC/ubsan-bool.m
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -x objective-c -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=bool %s -o - | FileCheck %s -check-prefixes=SHARED,OBJC
// RUN: %clang_cc1 -x objective-c++ -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=bool %s -o - | FileCheck %s -check-prefixes=SHARED,OBJC
// RUN: %clang_cc1 -x objective-c -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=bool %s -o - -w | FileCheck %s -check-prefixes=SHARED,OBJC
// RUN: %clang_cc1 -x objective-c++ -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=bool %s -o - -w | FileCheck %s -check-prefixes=SHARED,OBJC
// RUN: %clang_cc1 -x c -emit-llvm -triple x86_64-apple-macosx10.10.0 -fsanitize=bool %s -o - | FileCheck %s -check-prefixes=SHARED,C

typedef signed char BOOL;
Expand All @@ -10,4 +10,57 @@ BOOL f1() {
// C-NOT: call void @__ubsan_handle_load_invalid_value
BOOL a = 2;
return a + 1;
// SHARED: ret i8
}

struct S1 {
BOOL b1 : 1;
};

// SHARED-LABEL: f2
BOOL f2(struct S1 *s) {
// OBJC: [[LOAD:%.*]] = load i8, i8* {{.*}}
// OBJC: [[SHL:%.*]] = shl i8 [[LOAD]], 7
// OBJC: [[ASHR:%.*]] = ashr i8 [[SHL]], 7
// OBJC: icmp ule i8 [[ASHR]], 1, !nosanitize
// OBJC: call void @__ubsan_handle_load_invalid_value

// C-NOT: call void @__ubsan_handle_load_invalid_value
return s->b1;
// SHARED: ret i8
}

#ifdef __OBJC__
@interface I1 {
@public
BOOL b1 : 1;
}
@property (nonatomic) BOOL b1;
@end
@implementation I1
@synthesize b1;
@end

// Check the synthesized getter.
// OBJC-LABEL: define internal signext i8 @"\01-[I1 b1]"
// OBJC: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_I1.b1"
// OBJC: [[ADDR:%.*]] = getelementptr inbounds i8, i8* {{.*}}, i64 [[IVAR]]
// OBJC: [[LOAD:%.*]] = load i8, i8* {{.*}}
// OBJC: [[SHL:%.*]] = shl i8 [[LOAD]], 7
// OBJC: [[ASHR:%.*]] = ashr i8 [[SHL]], 7
// OBJC: icmp ule i8 [[ASHR]], 1, !nosanitize
// OBJC: call void @__ubsan_handle_load_invalid_value

// Also check direct accesses to the ivar.
// OBJC-LABEL: f3
BOOL f3(I1 *i) {
// OBJC: [[LOAD:%.*]] = load i8, i8* {{.*}}
// OBJC: [[SHL:%.*]] = shl i8 [[LOAD]], 7
// OBJC: [[ASHR:%.*]] = ashr i8 [[SHL]], 7
// OBJC: icmp ule i8 [[ASHR]], 1, !nosanitize
// OBJC: call void @__ubsan_handle_load_invalid_value

return i->b1;
// OBJC: ret i8
}
#endif /* __OBJC__ */

0 comments on commit 129edab

Please sign in to comment.