Skip to content

Commit

Permalink
[clang][UBSan] Sanitization for alignment assumptions.
Browse files Browse the repository at this point in the history
Summary:
UB isn't nice. It's cool and powerful, but not nice.
Having a way to detect it is nice though.
[[ https://wg21.link/p1007r3 | P1007R3: std::assume_aligned ]] / http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1007r2.pdf says:
```
We propose to add this functionality via a library function instead of a core language attribute.
...
If the pointer passed in is not aligned to at least N bytes, calling assume_aligned results in undefined behaviour.
```

This differential teaches clang to sanitize all the various variants of this assume-aligned attribute.

Requires D54588 for LLVM IRBuilder changes.
The compiler-rt part is D54590.

This is a second commit, the original one was r351105,
which was mass-reverted in r351159 because 2 compiler-rt tests were failing.

Reviewers: ABataev, craig.topper, vsk, rsmith, rnk, #sanitizers, erichkeane, filcab, rjmccall

Reviewed By: rjmccall

Subscribers: chandlerc, ldionne, EricWF, mclow.lists, cfe-commits, bkramer

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D54589

llvm-svn: 351177
  • Loading branch information
LebedevRI committed Jan 15, 2019
1 parent 6d0413f commit bd1c087
Show file tree
Hide file tree
Showing 19 changed files with 607 additions and 26 deletions.
43 changes: 43 additions & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -321,6 +321,49 @@ Undefined Behavior Sanitizer (UBSan)
* The Implicit Conversion Sanitizer (``-fsanitize=implicit-conversion``) has
learned to sanitize compound assignment operators.

* ``alignment`` check has learned to sanitize the assume_aligned-like attributes:

.. code-block:: c++

typedef char **__attribute__((align_value(1024))) aligned_char;
struct ac_struct {
aligned_char a;
};
char **load_from_ac_struct(struct ac_struct *x) {
return x->a; // <- check that loaded 'a' is aligned
}
char **passthrough(__attribute__((align_value(1024))) char **x) {
return x; // <- check the pointer passed as function argument
}
char **__attribute__((alloc_align(2)))
alloc_align(int size, unsigned long alignment);
char **caller(int size) {
return alloc_align(size, 1024); // <- check returned pointer
}
char **__attribute__((assume_aligned(1024))) get_ptr();
char **caller2() {
return get_ptr(); // <- check returned pointer
}
void *caller3(char **x) {
return __builtin_assume_aligned(x, 1024); // <- check returned pointer
}
void *caller4(char **x, unsigned long offset) {
return __builtin_assume_aligned(x, 1024, offset); // <- check returned pointer accounting for the offest
}
void process(char *data, int width) {
#pragma omp for simd aligned(data : 1024) // <- aligned clause will be checked.
for (int x = 0; x < width; x++)
data[x] *= data[x];
}
Core Analysis Improvements
==========================

Expand Down
2 changes: 1 addition & 1 deletion clang/docs/UndefinedBehaviorSanitizer.rst
Expand Up @@ -72,7 +72,7 @@ Available checks
Available checks are:

- ``-fsanitize=alignment``: Use of a misaligned pointer or creation
of a misaligned reference.
of a misaligned reference. Also sanitizes assume_aligned-like attributes.
- ``-fsanitize=bool``: Load of a ``bool`` value which is neither
``true`` nor ``false``.
- ``-fsanitize=builtin``: Passing invalid values to compiler builtins.
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -1904,15 +1904,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
case Builtin::BI__builtin_assume_aligned: {
Value *PtrValue = EmitScalarExpr(E->getArg(0));
const Expr *Ptr = E->getArg(0);
Value *PtrValue = EmitScalarExpr(Ptr);
Value *OffsetValue =
(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;

Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();

EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(),
Alignment, OffsetValue);
return RValue::get(PtrValue);
}
case Builtin::BI__assume:
Expand Down
18 changes: 11 additions & 7 deletions clang/lib/CodeGen/CGCall.cpp
Expand Up @@ -2410,7 +2410,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (!AVAttr)
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
if (AVAttr) {
if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) {
// If alignment-assumption sanitizer is enabled, we do *not* add
// alignment attribute here, but emit normal alignment assumption,
// so the UBSAN check could function.
llvm::Value *AlignmentValue =
EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI =
Expand Down Expand Up @@ -4535,13 +4538,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
OffsetValue);
EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
AlignmentCI->getZExtValue(), OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
llvm::Value *ParamVal =
CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue(
*this).getScalarVal();
EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()]
.getRValue(*this)
.getScalarVal();
EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
AlignmentVal);
}
}

Expand Down
10 changes: 7 additions & 3 deletions clang/lib/CodeGen/CGExprScalar.cpp
Expand Up @@ -258,8 +258,11 @@ class ScalarExprEmitter
AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>();
} else {
// Assumptions for function parameters are emitted at the start of the
// function, so there is no need to repeat that here.
if (isa<ParmVarDecl>(VD))
// function, so there is no need to repeat that here,
// unless the alignment-assumption sanitizer is enabled,
// then we prefer the assumption over alignment attribute
// on IR function param.
if (isa<ParmVarDecl>(VD) && !CGF.SanOpts.has(SanitizerKind::Alignment))
return;

AVAttr = VD->getAttr<AlignValueAttr>();
Expand All @@ -276,7 +279,8 @@ class ScalarExprEmitter

Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue);
CGF.EmitAlignmentAssumption(V, AlignmentCI->getZExtValue());
CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(),
AlignmentCI->getZExtValue());
}

/// EmitLoadOfLValue - Given an expression with complex type that represents a
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGStmtOpenMP.cpp
Expand Up @@ -1472,7 +1472,8 @@ static void emitAlignedClause(CodeGenFunction &CGF,
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
CGF.EmitAlignmentAssumption(PtrValue, Alignment);
CGF.EmitAlignmentAssumption(
PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
}
}
}
Expand Down
98 changes: 98 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Expand Up @@ -2207,6 +2207,49 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
protection.Inst->eraseFromParent();
}

void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
QualType Ty, SourceLocation Loc,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue) {
llvm::Value *TheCheck;
llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
if (SanOpts.has(SanitizerKind::Alignment)) {
EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
OffsetValue, TheCheck, Assumption);
}
}

void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
QualType Ty, SourceLocation Loc,
SourceLocation AssumptionLoc,
unsigned Alignment,
llvm::Value *OffsetValue) {
llvm::Value *TheCheck;
llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
if (SanOpts.has(SanitizerKind::Alignment)) {
llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment);
EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal,
OffsetValue, TheCheck, Assumption);
}
}

void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
const Expr *E,
SourceLocation AssumptionLoc,
unsigned Alignment,
llvm::Value *OffsetValue) {
if (auto *CE = dyn_cast<CastExpr>(E))
E = CE->getSubExprAsWritten();
QualType Ty = E->getType();
SourceLocation Loc = E->getExprLoc();

EmitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
OffsetValue);
}

llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn,
llvm::Value *AnnotatedVal,
StringRef AnnotationStr,
Expand Down Expand Up @@ -2459,6 +2502,61 @@ void CodeGenFunction::EmitMultiVersionResolver(
Builder.ClearInsertionPoint();
}

// Loc - where the diagnostic will point, where in the source code this
// alignment has failed.
// SecondaryLoc - if present (will be present if sufficiently different from
// Loc), the diagnostic will additionally point a "Note:" to this location.
// It should be the location where the __attribute__((assume_aligned))
// was written e.g.
void CodeGenFunction::EmitAlignmentAssumptionCheck(
llvm::Value *Ptr, QualType Ty, SourceLocation Loc,
SourceLocation SecondaryLoc, llvm::Value *Alignment,
llvm::Value *OffsetValue, llvm::Value *TheCheck,
llvm::Instruction *Assumption) {
assert(Assumption && isa<llvm::CallInst>(Assumption) &&
cast<llvm::CallInst>(Assumption)->getCalledValue() ==
llvm::Intrinsic::getDeclaration(
Builder.GetInsertBlock()->getParent()->getParent(),
llvm::Intrinsic::assume) &&
"Assumption should be a call to llvm.assume().");
assert(&(Builder.GetInsertBlock()->back()) == Assumption &&
"Assumption should be the last instruction of the basic block, "
"since the basic block is still being generated.");

if (!SanOpts.has(SanitizerKind::Alignment))
return;

// Don't check pointers to volatile data. The behavior here is implementation-
// defined.
if (Ty->getPointeeType().isVolatileQualified())
return;

// We need to temorairly remove the assumption so we can insert the
// sanitizer check before it, else the check will be dropped by optimizations.
Assumption->removeFromParent();

{
SanitizerScope SanScope(this);

if (!OffsetValue)
OffsetValue = Builder.getInt1(0); // no offset.

llvm::Constant *StaticData[] = {EmitCheckSourceLocation(Loc),
EmitCheckSourceLocation(SecondaryLoc),
EmitCheckTypeDescriptor(Ty)};
llvm::Value *DynamicData[] = {EmitCheckValue(Ptr),
EmitCheckValue(Alignment),
EmitCheckValue(OffsetValue)};
EmitCheck({std::make_pair(TheCheck, SanitizerKind::Alignment)},
SanitizerHandler::AlignmentAssumption, StaticData, DynamicData);
}

// We are now in the (new, empty) "cont" basic block.
// Reintroduce the assumption.
Builder.Insert(Assumption);
// FIXME: Assumption still has it's original basic block as it's Parent.
}

llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
if (CGDebugInfo *DI = getDebugInfo())
return DI->SourceLocToDebugLoc(Location);
Expand Down
33 changes: 22 additions & 11 deletions clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -131,6 +131,7 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0) \
SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0)

enum SanitizerHandler {
Expand Down Expand Up @@ -2633,12 +2634,6 @@ class CodeGenFunction : public CodeGenTypeCache {
ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre);

void EmitAlignmentAssumption(llvm::Value *PtrValue, unsigned Alignment,
llvm::Value *OffsetValue = nullptr) {
Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
OffsetValue);
}

/// Converts Location to a DebugLoc, if debug information is enabled.
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);

Expand Down Expand Up @@ -2802,11 +2797,27 @@ class CodeGenFunction : public CodeGenTypeCache {
PeepholeProtection protectFromPeepholes(RValue rvalue);
void unprotectFromPeepholes(PeepholeProtection protection);

void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr) {
Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
OffsetValue);
}
void EmitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty,
SourceLocation Loc,
SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue,
llvm::Value *TheCheck,
llvm::Instruction *Assumption);

void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
SourceLocation Loc, SourceLocation AssumptionLoc,
llvm::Value *Alignment,
llvm::Value *OffsetValue = nullptr);

void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
SourceLocation Loc, SourceLocation AssumptionLoc,
unsigned Alignment,
llvm::Value *OffsetValue = nullptr);

void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E,
SourceLocation AssumptionLoc, unsigned Alignment,
llvm::Value *OffsetValue = nullptr);

//===--------------------------------------------------------------------===//
// Statement Emission
Expand Down
@@ -0,0 +1,40 @@
// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE
// RUN: %clang_cc1 -fsanitize=alignment -fno-sanitize-recover=alignment -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_alignment_assumption" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE
// RUN: %clang_cc1 -fsanitize=alignment -fsanitize-recover=alignment -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_alignment_assumption" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER
// RUN: %clang_cc1 -fsanitize=alignment -fsanitize-trap=alignment -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_alignment_assumption" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE

typedef char **__attribute__((align_value(0x80000000))) aligned_char;

struct ac_struct {
// CHECK: %[[STRUCT_AC_STRUCT:.*]] = type { i8** }
aligned_char a;
};

// CHECK-SANITIZE-ANYRECOVER: @[[ALIGNED_CHAR:.*]] = {{.*}} c"'aligned_char' (aka 'char **')\00" }
// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_ALIGNMENT_ASSUMPTION:.*]] = {{.*}}, i32 100, i32 13 }, {{.*}}* @[[ALIGNED_CHAR]] }

char **load_from_ac_struct(struct ac_struct *x) {
// CHECK: define i8** @{{.*}}(%[[STRUCT_AC_STRUCT]]* %[[X:.*]])
// CHECK-NEXT: [[ENTRY:.*]]:
// CHECK-NEXT: %[[STRUCT_AC_STRUCT_ADDR:.*]] = alloca %[[STRUCT_AC_STRUCT]]*, align 8
// CHECK-NEXT: store %[[STRUCT_AC_STRUCT]]* %[[X]], %[[STRUCT_AC_STRUCT]]** %[[STRUCT_AC_STRUCT_ADDR]], align 8
// CHECK-NEXT: %[[X_RELOADED:.*]] = load %[[STRUCT_AC_STRUCT]]*, %[[STRUCT_AC_STRUCT]]** %[[STRUCT_AC_STRUCT_ADDR]], align 8
// CHECK: %[[A_ADDR:.*]] = getelementptr inbounds %[[STRUCT_AC_STRUCT]], %[[STRUCT_AC_STRUCT]]* %[[X_RELOADED]], i32 0, i32 0
// CHECK: %[[A:.*]] = load i8**, i8*** %[[A_ADDR]], align 8
// CHECK-NEXT: %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64
// CHECK-NEXT: %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647
// CHECK-NEXT: %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
// CHECK-SANITIZE-NEXT: %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[A]] to i64, !nosanitize
// CHECK-SANITIZE-NEXT: br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
// CHECK-SANITIZE: [[HANDLER_ALIGNMENT_ASSUMPTION]]:
// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_alignment_assumption_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}, {{{.*}}}* }* @[[LINE_100_ALIGNMENT_ASSUMPTION]] to i8*), i64 %[[PTRINT_DUP]], i64 2147483648, i64 0){{.*}}, !nosanitize
// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_alignment_assumption(i8* bitcast ({ {{{.*}}}, {{{.*}}}, {{{.*}}}* }* @[[LINE_100_ALIGNMENT_ASSUMPTION]] to i8*), i64 %[[PTRINT_DUP]], i64 2147483648, i64 0){{.*}}, !nosanitize
// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize
// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize
// CHECK-SANITIZE: [[CONT]]:
// CHECK-NEXT: call void @llvm.assume(i1 %[[MASKCOND]])
// CHECK-NEXT: ret i8** %[[A]]
// CHECK-NEXT: }
#line 100
return x->a;
}

0 comments on commit bd1c087

Please sign in to comment.