From 4faeb87ebe8942dc7b997f763ed8899a0bcfb963 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 29 Aug 2017 20:51:24 +0000 Subject: [PATCH] [libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases llvm-svn: 312033 --- compiler-rt/lib/fuzzer/FuzzerIO.h | 1 + compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp | 7 ++ compiler-rt/lib/fuzzer/FuzzerInternal.h | 2 - compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 107 +++++++++++---------- compiler-rt/test/fuzzer/fuzzer-dirs.test | 2 +- compiler-rt/test/fuzzer/reduce_inputs.test | 2 +- 6 files changed, 68 insertions(+), 53 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 8ed0e003d4201..5059c11ac74ca 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -53,6 +53,7 @@ void RawPrint(const char *Str); // Platform specific functions: bool IsFile(const std::string &Path); +size_t FileSize(const std::string &Path); void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector *V, bool TopDir); diff --git a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp index d642b3424697a..2c452a7dd8d24 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp @@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) { return S_ISREG(St.st_mode); } +size_t FileSize(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return 0; + return St.st_size; +} + void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector *V, bool TopDir) { auto E = GetEpoch(Dir); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index 70136a30b00eb..34fdeb8215800 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -38,7 +38,6 @@ class Fuzzer { void Loop(const Vector &CorpusDirs); void ReadAndExecuteSeedCorpora(const Vector &CorpusDirs); void MinimizeCrashLoop(const Unit &U); - void ShuffleAndMinimize(UnitVector *V); void RereadOutputCorpus(size_t MaxSize); size_t secondsSinceProcessStartUp() { @@ -103,7 +102,6 @@ class Fuzzer { void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); void PrintStatusForNewUnit(const Unit &U, const char *Text); - void ShuffleCorpus(UnitVector *V); void CheckExitOnSrcPosOrItem(); static void StaticDeathCallback(); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index 84ea2d6e85193..97fc31cc87a25 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { PrintStats("RELOAD"); } -void Fuzzer::ShuffleCorpus(UnitVector *V) { - std::shuffle(V->begin(), V->end(), MD.GetRand()); - if (Options.PreferSmall) - std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) { - return A.size() < B.size(); - }); -} - -void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { - Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(), - GetPeakRSSMb()); - if (Options.ShuffleAtStartUp) - ShuffleCorpus(InitialCorpus); - - // Test the callback with empty input and never try it again. - uint8_t dummy; - ExecuteCallback(&dummy, 0); - - for (auto &U : *InitialCorpus) { - RunOne(U.data(), U.size()); - CheckExitOnSrcPosOrItem(); - TryDetectingAMemoryLeak(U.data(), U.size(), - /*DuringInitialCorpusExecution*/ true); - U.clear(); - } - PrintStats("INITED"); - if (Corpus.empty()) { - Printf("ERROR: no interesting inputs were found. " - "Is the code instrumented for coverage? Exiting.\n"); - exit(1); - } -} - void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime).count(); @@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() { void Fuzzer::ReadAndExecuteSeedCorpora(const Vector &CorpusDirs) { const size_t kMaxSaneLen = 1 << 20; const size_t kMinDefaultLen = 4096; - size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - UnitVector InitialCorpus; - for (auto &Inp : CorpusDirs) { - Printf("Loading corpus dir: %s\n", Inp.c_str()); - ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, - TemporaryMaxLen, /*ExitOnError=*/false); + struct SizedFile { + std::string File; + size_t Size; + }; + Vector SizedFiles; + size_t MaxSize = 0; + size_t MinSize = -1; + size_t TotalSize = 0; + for (auto &Dir : CorpusDirs) { + Vector Files; + ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true); + Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str()); + for (auto &File : Files) { + if (size_t Size = FileSize(File)) { + MaxSize = Max(Size, MaxSize); + MinSize = Min(Size, MinSize); + TotalSize += Size; + SizedFiles.push_back({File, Size}); + } + } } - if (Options.MaxLen == 0) { - size_t MaxLen = 0; - for (auto &U : InitialCorpus) - MaxLen = std::max(U.size(), MaxLen); - SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen)); + if (Options.MaxLen == 0) + SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen)); + assert(MaxInputLen > 0); + + if (SizedFiles.empty()) { + Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); + Unit U({'\n'}); // Valid ASCII input. + RunOne(U.data(), U.size()); + } else { + Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb" + " rss: %zdMb\n", + SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb()); + if (Options.ShuffleAtStartUp) + std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand()); + + if (Options.PreferSmall) + std::stable_sort( + SizedFiles.begin(), SizedFiles.end(), + [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; }); + + // Load and execute inputs one by one. + for (auto &SF : SizedFiles) { + auto U = FileToVector(SF.File, MaxInputLen); + assert(U.size() <= MaxInputLen); + RunOne(U.data(), U.size()); + CheckExitOnSrcPosOrItem(); + TryDetectingAMemoryLeak(U.data(), U.size(), + /*DuringInitialCorpusExecution*/ true); + } } - if (InitialCorpus.empty()) { - InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input. - if (Options.Verbosity) - Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); + // Test the callback with empty input and never try it again. + uint8_t dummy; + ExecuteCallback(&dummy, 0); + + PrintStats("INITED"); + if (Corpus.empty()) { + Printf("ERROR: no interesting inputs were found. " + "Is the code instrumented for coverage? Exiting.\n"); + exit(1); } - ShuffleAndMinimize(&InitialCorpus); } void Fuzzer::Loop(const Vector &CorpusDirs) { diff --git a/compiler-rt/test/fuzzer/fuzzer-dirs.test b/compiler-rt/test/fuzzer/fuzzer-dirs.test index ef0888f3be50f..9b6e4d1eedda6 100644 --- a/compiler-rt/test/fuzzer/fuzzer-dirs.test +++ b/compiler-rt/test/fuzzer/fuzzer-dirs.test @@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a RUN: echo b > %t/SUB1/SUB2/b RUN: echo c > %t/SUB1/SUB2/SUB3/c RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS -SUBDIRS: READ units: 3 +SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64 RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256 RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024 diff --git a/compiler-rt/test/fuzzer/reduce_inputs.test b/compiler-rt/test/fuzzer/reduce_inputs.test index 02e090ebdd41a..94f8cc4f37a8f 100644 --- a/compiler-rt/test/fuzzer/reduce_inputs.test +++ b/compiler-rt/test/fuzzer/reduce_inputs.test @@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60' # Test that reduce_inputs deletes redundant files in the corpus. RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT -COUNT: READ units: 4 +COUNT: seed corpus: files: 4 # a bit longer test RUN: %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -runs=1000000 2>&1 | FileCheck %s