Skip to content

Commit

Permalink
[DFSan] Add callback that allows to track which function tainted data…
Browse files Browse the repository at this point in the history
… reaches.

Authored-by: Christopher Liebchen <liebchen@google.com>
Co-authored-by: Andrew Browne <browneee@google.com>

Reviewed By: browneee

Differential Revision: https://reviews.llvm.org/D139543
  • Loading branch information
browneee committed Dec 12, 2022
1 parent c2f199f commit 5bb06c7
Show file tree
Hide file tree
Showing 7 changed files with 296 additions and 0 deletions.
20 changes: 20 additions & 0 deletions compiler-rt/include/sanitizer/dfsan_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
dfsan_origin origin);

/// Signature of the callback argument to dfsan_set_reaches_function_callback().
/// The description is intended to hold the name of the variable.
typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
dfsan_origin origin,
const char *file,
unsigned int line,
const char *function);

/// Computes the union of \c l1 and \c l2, resulting in a union label.
dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);

Expand Down Expand Up @@ -91,6 +99,18 @@ void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
/// This function returns all label bits seen in signal handler conditions.
dfsan_label dfsan_get_labels_in_signal_conditional();

/// Sets a callback to be invoked when tainted data reaches a function.
/// This could occur at function entry, or at a load instruction.
/// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
void dfsan_set_reaches_function_callback(
dfsan_reaches_function_callback_t callback);

/// Making callbacks that handle signals well is tricky, so when
/// -dfsan-reaches-function-callbacks=true, functions reached in signal
/// handlers will add the labels they see into a global (bitwise-or together).
/// This function returns all label bits seen during signal handlers.
dfsan_label dfsan_get_labels_in_signal_reaches_function();

/// Interceptor hooks.
/// Whenever a dfsan's custom function is called the corresponding
/// hook is called it non-zero. The hooks should be defined by the user.
Expand Down
62 changes: 62 additions & 0 deletions compiler-rt/lib/dfsan/dfsan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,67 @@ dfsan_get_labels_in_signal_conditional() {
return __dfsan::labels_in_signal_conditional;
}

namespace __dfsan {

typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
dfsan_origin origin,
const char *file,
unsigned int line,
const char *function);
static dfsan_reaches_function_callback_t reaches_function_callback = nullptr;
static dfsan_label labels_in_signal_reaches_function = 0;

static void ReachesFunctionCallback(dfsan_label label, dfsan_origin origin,
const char *file, unsigned int line,
const char *function) {
if (label == 0) {
return;
}
if (reaches_function_callback == nullptr) {
return;
}

// This initial ReachesFunctionCallback handler needs to be in here in dfsan
// runtime (rather than being an entirely user implemented hook) so that it
// has access to dfsan thread information.
DFsanThread *t = GetCurrentThread();
// A callback operation which does useful work (like record the flow) will
// likely be too long executed in a signal handler.
if (t && t->InSignalHandler()) {
// Record set of labels used in signal handler for completeness.
labels_in_signal_reaches_function |= label;
return;
}

reaches_function_callback(label, origin, file, line, function);
}

} // namespace __dfsan

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
__dfsan_reaches_function_callback_origin(dfsan_label label, dfsan_origin origin,
const char *file, unsigned int line,
const char *function) {
__dfsan::ReachesFunctionCallback(label, origin, file, line, function);
}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
__dfsan_reaches_function_callback(dfsan_label label, const char *file,
unsigned int line, const char *function) {
__dfsan::ReachesFunctionCallback(label, 0, file, line, function);
}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
dfsan_set_reaches_function_callback(
__dfsan::dfsan_reaches_function_callback_t callback) {
__dfsan::reaches_function_callback = callback;
}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
dfsan_get_labels_in_signal_reaches_function() {
return __dfsan::labels_in_signal_reaches_function;
}

class Decorator : public __sanitizer::SanitizerCommonDecorator {
public:
Decorator() : SanitizerCommonDecorator() {}
Expand Down Expand Up @@ -1031,6 +1092,7 @@ extern "C" void dfsan_flush() {
}
}
__dfsan::labels_in_signal_conditional = 0;
__dfsan::labels_in_signal_reaches_function = 0;
}

// TODO: CheckMemoryLayoutSanity is based on msan.
Expand Down
6 changes: 6 additions & 0 deletions compiler-rt/lib/dfsan/done_abilist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ fun:dfsan_set_conditional_callback=uninstrumented
fun:dfsan_set_conditional_callback=discard
fun:dfsan_get_labels_in_signal_conditional=uninstrumented
fun:dfsan_get_labels_in_signal_conditional=discard
fun:dfsan_set_reaches_function_callback=uninstrumented
fun:dfsan_set_reaches_function_callback=discard
fun:dfsan_get_labels_in_signal_reaches_function=uninstrumented
fun:dfsan_get_labels_in_signal_reaches_function=discard
fun:dfsan_reaches_function_callback=uninstrumented
fun:dfsan_reaches_function_callback=discard

###############################################################################
# glibc
Expand Down
6 changes: 6 additions & 0 deletions compiler-rt/test/dfsan/Inputs/flags_abilist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ fun:my_dfsan_conditional_callback=discard

fun:dfsan_set_conditional_callback=uninstrumented
fun:dfsan_set_conditional_callback=discard

fun:my_dfsan_reaches_function_callback=uninstrumented
fun:my_dfsan_reaches_function_callback=discard

fun:dfsan_set_reaches_function_callback=uninstrumented
fun:dfsan_set_reaches_function_callback=discard
67 changes: 67 additions & 0 deletions compiler-rt/test/dfsan/reaches_function.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 %s %t-callbacks.o -o %t
// RUN: %run %t 2>&1 | FileCheck %s

// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGIN_TRACKING -c %s -o %t-callbacks.o
// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 -mllvm -dfsan-track-origins=2 %s %t-callbacks.o -o %t
// RUN: %run %t 2>&1 | FileCheck --check-prefix=CHECK-ORIGIN-TRACKING %s

// REQUIRES: x86_64-target-arch

// Tests that callbacks are inserted for reached functions when
// -dfsan-reaches-function-callbacks is specified.

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sanitizer/dfsan_interface.h>

#ifdef CALLBACKS
// Compile this code without DFSan to avoid recursive instrumentation.

void my_dfsan_reaches_function_callback(dfsan_label label, dfsan_origin origin,
const char *file, unsigned int line,
const char *function) {
#ifdef ORIGIN_TRACKING
dfsan_print_origin_id_trace(origin);
#else
printf("%s:%d %s\n", file, line, function);
#endif
}

#else

__attribute__((noinline)) uint64_t add(uint64_t *a, uint64_t *b) {

return *a + *b;
// CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] add.dfsan
// CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at
// CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 3]]:{{.*}}
// CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at
// CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}}
}

extern void my_dfsan_reaches_function_callback(dfsan_label label,
dfsan_origin origin,
const char *file,
unsigned int line,
const char *function);

int main(int argc, char *argv[]) {

dfsan_set_reaches_function_callback(my_dfsan_reaches_function_callback);

uint64_t a = 0;
uint64_t b = 0;

dfsan_set_label(8, &a, sizeof(a));
uint64_t c = add(&a, &b);
// CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] main
// CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at
// CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}}
// CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at
// CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 6]]:{{.*}}
return c;
}

#endif // #ifdef CALLBACKS
106 changes: 106 additions & 0 deletions llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,14 @@ static cl::opt<bool> ClConditionalCallbacks(
cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
cl::init(false));

// Experimental feature that inserts callbacks for data reaching a function,
// either via function arguments and loads.
// This must be true for dfsan_set_reaches_function_callback() to have effect.
static cl::opt<bool> ClReachesFunctionCallbacks(
"dfsan-reaches-function-callbacks",
cl::desc("Insert calls to callback functions on data reaching a function."),
cl::Hidden, cl::init(false));

// Controls whether the pass tracks the control flow of select instructions.
static cl::opt<bool> ClTrackSelectControlFlow(
"dfsan-track-select-control-flow",
Expand Down Expand Up @@ -446,6 +454,8 @@ class DataFlowSanitizer {
FunctionType *DFSanVarargWrapperFnTy;
FunctionType *DFSanConditionalCallbackFnTy;
FunctionType *DFSanConditionalCallbackOriginFnTy;
FunctionType *DFSanReachesFunctionCallbackFnTy;
FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
FunctionType *DFSanCmpCallbackFnTy;
FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy;
Expand All @@ -467,6 +477,8 @@ class DataFlowSanitizer {
FunctionCallee DFSanMemTransferCallbackFn;
FunctionCallee DFSanConditionalCallbackFn;
FunctionCallee DFSanConditionalCallbackOriginFn;
FunctionCallee DFSanReachesFunctionCallbackFn;
FunctionCallee DFSanReachesFunctionCallbackOriginFn;
FunctionCallee DFSanCmpCallbackFn;
FunctionCallee DFSanChainOriginFn;
FunctionCallee DFSanChainOriginIfTaintedFn;
Expand Down Expand Up @@ -673,6 +685,11 @@ struct DFSanFunction {
// branch instruction using the given conditional expression.
void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);

// If ClReachesFunctionCallbacks is enabled, insert a callback for each
// argument and load instruction.
void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
Value *Data);

bool isLookupTableConstant(Value *P);

private:
Expand Down Expand Up @@ -1025,6 +1042,45 @@ void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
}
}

void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
Instruction &I,
Value *Data) {
if (!ClReachesFunctionCallbacks) {
return;
}
const DebugLoc &dbgloc = I.getDebugLoc();
Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
ConstantInt *CILine;
llvm::Value *FilePathPtr;

if (dbgloc.get() == nullptr) {
CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0, false));
FilePathPtr = IRB.CreateGlobalStringPtr(
I.getFunction()->getParent()->getSourceFileName());
} else {
CILine = llvm::ConstantInt::get(I.getContext(),
llvm::APInt(32, dbgloc.getLine(), false));
FilePathPtr =
IRB.CreateGlobalStringPtr(dbgloc->getFilename());
}

llvm::Value *FunctionNamePtr =
IRB.CreateGlobalStringPtr(I.getFunction()->getName());

CallInst *CB;
std::vector<Value *> args;

if (DFS.shouldTrackOrigins()) {
Value *DataOrigin = getOrigin(Data);
args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
} else {
args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
}
CB->setDebugLoc(dbgloc);
}

Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
if (!OrigTy->isSized())
return PrimitiveShadowTy;
Expand Down Expand Up @@ -1097,6 +1153,16 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
DFSanConditionalCallbackOriginFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
/*isVarArg=*/false);
Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
OriginTy, Int8Ptr};
DFSanReachesFunctionCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
/*isVarArg=*/false);
Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
/*isVarArg=*/false);
DFSanCmpCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
/*isVarArg=*/false);
Expand Down Expand Up @@ -1324,6 +1390,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
Expand Down Expand Up @@ -1357,6 +1427,11 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
DFSanConditionalCallbackOriginFn =
Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
DFSanConditionalCallbackOriginFnTy);
DFSanReachesFunctionCallbackFn = Mod->getOrInsertFunction(
"__dfsan_reaches_function_callback", DFSanReachesFunctionCallbackFnTy);
DFSanReachesFunctionCallbackOriginFn =
Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
DFSanReachesFunctionCallbackOriginFnTy);
}

void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
Expand Down Expand Up @@ -1585,6 +1660,31 @@ bool DataFlowSanitizer::runImpl(
DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
FnsWithForceZeroLabel.count(F), GetTLI(*F));

if (ClReachesFunctionCallbacks) {
// Add callback for arguments reaching this function.
for (auto &FArg : F->args()) {
Instruction *Next = &F->getEntryBlock().front();
Value *FArgShadow = DFSF.getShadow(&FArg);
if (isZeroShadow(FArgShadow))
continue;
if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
Next = FArgShadowInst->getNextNode();
}
if (shouldTrackOrigins()) {
if (Instruction *Origin =
dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
// Ensure IRB insertion point is after loads for shadow and origin.
Instruction *OriginNext = Origin->getNextNode();
if (Next->comesBefore(OriginNext)) {
Next = OriginNext;
}
}
}
IRBuilder<> IRB(Next);
DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
}
}

// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
Expand Down Expand Up @@ -2267,6 +2367,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
if (LI.isAtomic())
LI.setOrdering(addAcquireOrdering(LI.getOrdering()));

Instruction *AfterLi = LI.getNextNode();
Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
std::vector<Value *> Shadows;
std::vector<Value *> Origins;
Expand Down Expand Up @@ -2304,6 +2405,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
}

IRBuilder<> IRB(AfterLi);
DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
}

Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
Expand Down Expand Up @@ -3303,6 +3407,8 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
DFSF.SkipInsts.insert(LI);
DFSF.setOrigin(&CB, LI);
}

DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
}
}

Expand Down
Loading

0 comments on commit 5bb06c7

Please sign in to comment.