-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libFuzzer] Experimental data flow tracer for fuzz targets.
Summary: Experimental data flow tracer for fuzz targets. Allows to tell which bytes of the input affect which functions of the fuzz target. We previously attempted to use DFSan directly in the libFuzzer process, and that didn't work nicely. Now we will try to collect the data flow information for the seed corpus in a separate process (using this tracer), and then use it in the regular libFuzzer runs. Reviewers: morehouse, pcc, Dor1s Reviewed By: morehouse, Dor1s Subscribers: delcypher, #sanitizers, llvm-commits Differential Revision: https://reviews.llvm.org/D46666 llvm-svn: 332029
- Loading branch information
Showing
3 changed files
with
313 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,203 @@ | ||
| /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // An experimental data-flow tracer for fuzz targets. | ||
| // It is based on DFSan and SanitizerCoverage. | ||
| // https://clang.llvm.org/docs/DataFlowSanitizer.html | ||
| // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow | ||
| // | ||
| // It executes the fuzz target on the given input while monitoring the | ||
| // data flow for every instrumented comparison instruction. | ||
| // | ||
| // The output shows which functions depend on which bytes of the input. | ||
| // | ||
| // Build: | ||
| // 1. Compile this file with -fsanitize=dataflow | ||
| // 2. Build the fuzz target with -g -fsanitize=dataflow | ||
| // -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp | ||
| // 3. Link those together with -fsanitize=dataflow | ||
| // | ||
| // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison | ||
| // instruction, DFSan modifies the calls to pass the data flow labels. | ||
| // The callbacks update the data flow label for the current function. | ||
| // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. | ||
| // | ||
| // -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function | ||
| // entries so that the comparison callback knows that current function. | ||
| // | ||
| // | ||
| // Run: | ||
| // # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout) | ||
| // ./a.out INPUT_FILE [OUTPUT_FILE] | ||
| // | ||
| // # Print all instrumented functions. llvm-symbolizer must be present in PATH | ||
| // ./a.out | ||
| // | ||
| // Example output: | ||
| // =============== | ||
| // LEN: 5 | ||
| // LABELS: 10 | ||
| // L7 1 6 | ||
| // L8 2 7 | ||
| // L9 3 8 | ||
| // L10 4 9 | ||
| // F1 10 | ||
| // F2 5 | ||
| // =============== | ||
| // "LEN:" indicates the number of bytes in the input. | ||
| // "LABELS:" indicates the number of DFSan labels created while running the input. | ||
| // * The labels [1,LEN] correspond to the bytes of the input | ||
| // (label 1 corresponds to byte 0, and so on) | ||
| // * The label LEN+1 corresponds to the input size. | ||
| // * The labels [LEN+2,LABELS] correspond to DFSan's union labels. | ||
| // "Li j k": describes the label 'i' as a union of labels 'j' and 'k'. | ||
| // "Ff l": tells that the function 'f' depends on the label 'l'. | ||
| //===----------------------------------------------------------------------===*/ | ||
|
|
||
| #include <assert.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <stdint.h> | ||
| #include <string.h> | ||
|
|
||
| #include <execinfo.h> // backtrace_symbols_fd | ||
|
|
||
| #include <sanitizer/dfsan_interface.h> | ||
|
|
||
| extern "C" { | ||
| extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); | ||
| __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); | ||
| } // extern "C" | ||
|
|
||
| static size_t InputLen; | ||
| static size_t NumFuncs; | ||
| static const uintptr_t *FuncsBeg; | ||
| static __thread size_t CurrentFunc; | ||
| static dfsan_label *FuncLabels; // Array of NumFuncs elements. | ||
|
|
||
| // Prints all instrumented functions. | ||
| int PrintFunctions() { | ||
| // We don't have the symbolizer integrated with dfsan yet. | ||
| // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. | ||
| // TODO(kcc): this is pretty ugly and may break in lots of ways. | ||
| // We'll need to make a proper in-process symbolizer work with DFSan. | ||
| FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " | ||
| "| llvm-symbolizer " | ||
| "| grep 'dfs\\$' " | ||
| "| sed 's/dfs\\$//g'", "w"); | ||
| for (size_t I = 0; I < NumFuncs; I++) { | ||
| uintptr_t PC = FuncsBeg[I * 2]; | ||
| void *const Buf[1] = {(void*)PC}; | ||
| backtrace_symbols_fd(Buf, 1, fileno(Pipe)); | ||
| } | ||
| pclose(Pipe); | ||
| return 0; | ||
| } | ||
|
|
||
| void PrintDataFlow(FILE *Out) { | ||
| fprintf(Out, "LEN: %zd\n", InputLen); | ||
| fprintf(Out, "LABELS: %zd\n", dfsan_get_label_count()); | ||
| for (dfsan_label L = InputLen + 2; L <= dfsan_get_label_count(); L++) { | ||
| auto *DLI = dfsan_get_label_info(L); | ||
| fprintf(Out, "L%d %d %d\n", L, DLI->l1, DLI->l2); | ||
| } | ||
| for (size_t I = 0; I < NumFuncs; I++) | ||
| if (FuncLabels[I]) | ||
| fprintf(Out, "F%zd %d\n", I, FuncLabels[I]); | ||
| } | ||
|
|
||
| int main(int argc, char **argv) { | ||
| if (LLVMFuzzerInitialize) | ||
| LLVMFuzzerInitialize(&argc, &argv); | ||
| if (argc == 1) | ||
| return PrintFunctions(); | ||
| assert(argc == 2 || argc == 3); | ||
|
|
||
| const char *Input = argv[1]; | ||
| fprintf(stderr, "INFO: reading '%s'\n", Input); | ||
| FILE *In = fopen(Input, "r"); | ||
| assert(In); | ||
| fseek(In, 0, SEEK_END); | ||
| InputLen = ftell(In); | ||
| fseek(In, 0, SEEK_SET); | ||
| unsigned char *Buf = (unsigned char*)malloc(InputLen); | ||
| size_t NumBytesRead = fread(Buf, 1, InputLen, In); | ||
| assert(NumBytesRead == InputLen); | ||
| fclose(In); | ||
|
|
||
| fprintf(stderr, "INFO: running '%s'\n", Input); | ||
| for (size_t I = 1; I <= InputLen; I++) { | ||
| dfsan_label L = dfsan_create_label("", nullptr); | ||
| assert(L == I); | ||
| dfsan_set_label(L, Buf + I - 1, 1); | ||
| } | ||
| dfsan_label SizeL = dfsan_create_label("", nullptr); | ||
| assert(SizeL == InputLen + 1); | ||
| dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); | ||
|
|
||
| LLVMFuzzerTestOneInput(Buf, InputLen); | ||
| free(Buf); | ||
|
|
||
| bool OutIsStdout = argc == 2; | ||
| fprintf(stderr, "INFO: writing dataflow to %s\n", | ||
| OutIsStdout ? "<stdout>" : argv[2]); | ||
| FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w"); | ||
| PrintDataFlow(Out); | ||
| if (!OutIsStdout) fclose(Out); | ||
| } | ||
|
|
||
| extern "C" { | ||
|
|
||
| void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, | ||
| uint32_t *stop) { | ||
| assert(NumFuncs == 0 && "This tool does not support DSOs"); | ||
| assert(start < stop && "The code is not instrumented for coverage"); | ||
| if (start == stop || *start) return; // Initialize only once. | ||
| for (uint32_t *x = start; x < stop; x++) | ||
| *x = ++NumFuncs; // The first index is 1. | ||
| FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); | ||
| fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs); | ||
| } | ||
|
|
||
| void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, | ||
| const uintptr_t *pcs_end) { | ||
| assert(NumFuncs == (pcs_end - pcs_beg) / 2); | ||
| FuncsBeg = pcs_beg; | ||
| } | ||
|
|
||
| void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. | ||
|
|
||
| void __sanitizer_cov_trace_pc_guard(uint32_t *guard){ | ||
| uint32_t FuncNum = *guard - 1; // Guards start from 1. | ||
| assert(FuncNum < NumFuncs); | ||
| CurrentFunc = FuncNum; | ||
| } | ||
|
|
||
| void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, | ||
| dfsan_label L1, dfsan_label UnusedL) { | ||
| assert(CurrentFunc < NumFuncs); | ||
| FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); | ||
| } | ||
|
|
||
| #define HOOK(Name, Type) \ | ||
| void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ | ||
| assert(CurrentFunc < NumFuncs); \ | ||
| FuncLabels[CurrentFunc] = \ | ||
| dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ | ||
| } | ||
|
|
||
| HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) | ||
| HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) | ||
|
|
||
| } // extern "C" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
|
|
||
| // Find "FUZZME", the target has 3 different functions. | ||
| #include <assert.h> | ||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <cstdlib> | ||
| #include <cstdio> | ||
|
|
||
| __attribute__((noinline)) | ||
| static bool Func1(const uint8_t *Data, size_t Size) { | ||
| // assumes Size >= 5, doesn't check it. | ||
| return Data[4] == 'M'; | ||
| } | ||
|
|
||
| __attribute__((noinline)) | ||
| bool Func2(const uint8_t *Data, size_t Size) { | ||
| return Size >= 6 && Data[5] == 'E'; | ||
| } | ||
|
|
||
| extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { | ||
| if (Size >= 5 | ||
| && Data[0] == 'F' | ||
| && Data[1] == 'U' | ||
| && Data[2] == 'Z' | ||
| && Data[3] == 'Z' | ||
| && Func1(Data, Size) | ||
| && Func2(Data, Size)) { | ||
| fprintf(stderr, "BINGO\n"); | ||
| abort(); | ||
| } | ||
| return 0; | ||
| } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| # Tests the data flow tracer. | ||
| REQUIRES: linux | ||
|
|
||
| # Build the tracer and the test. | ||
| RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest.o | ||
| RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow %t-ThreeFunctionsTest.o %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-ThreeFunctionsTestDF | ||
|
|
||
| # Dump the function list. | ||
| RUN: %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST | ||
| FUNC_LIST-DAG: LLVMFuzzerTestOneInput | ||
| FUNC_LIST-DAG: Func1 | ||
| FUNC_LIST-DAG: Func2 | ||
|
|
||
| # Prepare the inputs. | ||
| RUN: rm -rf %t/IN | ||
| RUN: mkdir -p %t/IN | ||
| RUN: echo -n ABC > %t/IN/ABC | ||
| RUN: echo -n FUABC > %t/IN/FUABC | ||
| RUN: echo -n FUZZR > %t/IN/FUZZR | ||
| RUN: echo -n FUZZM > %t/IN/FUZZM | ||
| RUN: echo -n FUZZMU > %t/IN/FUZZMU | ||
|
|
||
| # ABC: No data is used, the only used label is 4 (corresponds to the size) | ||
| RUN:%t-ThreeFunctionsTestDF %t/IN/ABC | FileCheck %s --check-prefix=IN_ABC | ||
| IN_ABC: LEN: 3 | ||
| IN_ABC: LABELS: 4 | ||
| IN_ABC: F{{[012]}} 4 | ||
| IN_ABC-NOT: F | ||
|
|
||
| # FUABC: First 3 bytes are checked, Func1/Func2 are not called. | ||
| RUN:%t-ThreeFunctionsTestDF %t/IN/FUABC | FileCheck %s --check-prefix=IN_FUABC | ||
| IN_FUABC: LEN: 5 | ||
| IN_FUABC: LABELS: | ||
| IN_FUABC: L{{.*}} 1 | ||
| IN_FUABC: L{{.*}} 2 | ||
| IN_FUABC: L{{.*}} 3 | ||
| IN_FUABC-NOT: L{{.*}} 4 | ||
| IN_FUABC: F{{[012]}} | ||
| IN_FUABC-NOT: F | ||
|
|
||
| # FUZZR: 5 bytes are used (4 in one function, 5-th in the other), Func2 is not called. | ||
| RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZR | FileCheck %s --check-prefix=IN_FUZZR | ||
| IN_FUZZR: LEN: 5 | ||
| IN_FUZZR: LABELS: | ||
| IN_FUZZR: L{{.*}} 1 | ||
| IN_FUZZR: L{{.*}} 2 | ||
| IN_FUZZR: L{{.*}} 3 | ||
| IN_FUZZR: L[[L0:[0-9]*]] 4 | ||
| IN_FUZZR-DAG: F{{[012]}} 5 | ||
| IN_FUZZR-DAG: F{{[012]}} [[L0]] | ||
| IN_FUZZR-NOT: F | ||
|
|
||
| # FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size (label 6). | ||
| RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZM | FileCheck %s --check-prefix=IN_FUZZM | ||
| IN_FUZZM: LEN: 5 | ||
| IN_FUZZM: LABELS: | ||
| IN_FUZZM: L{{.*}} 1 | ||
| IN_FUZZM: L{{.*}} 2 | ||
| IN_FUZZM: L{{.*}} 3 | ||
| IN_FUZZM: L{{.*}} 4 | ||
| IN_FUZZM-DAG: F{{[012]}} 6 | ||
| IN_FUZZM-DAG: F{{[012]}} 5 | ||
| IN_FUZZM-DAG: F | ||
|
|
||
| # FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size (label 7) | ||
| RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZMU | FileCheck %s --check-prefix=IN_FUZZMU | ||
| IN_FUZZMU: LEN: 6 | ||
| IN_FUZZMU: LABELS: | ||
| IN_FUZZMU: L{{.*}} 1 | ||
| IN_FUZZMU: L{{.*}} 2 | ||
| IN_FUZZMU: L{{.*}} 3 | ||
| IN_FUZZMU: L{{.*}} 4 | ||
| IN_FUZZMU: L[[L2:[0-9]*]] 6 7 | ||
| IN_FUZZMU-DAG: F{{[012]}} 5 | ||
| IN_FUZZMU-DAG: F{{[012]}} [[L2]] | ||
| IN_FUZZMU-DAG: F |