@@ -38,12 +38,102 @@ struct InputInfo {
38
38
bool HasFocusFunction = false ;
39
39
Vector<uint32_t > UniqFeatureSet;
40
40
Vector<uint8_t > DataFlowTraceForFocusFunction;
41
+ // Power schedule.
42
+ bool NeedsEnergyUpdate = false ;
43
+ double Energy = 0.0 ;
44
+ size_t SumIncidence = 0 ;
45
+ Vector<std::pair<uint32_t , uint16_t >> FeatureFreqs;
46
+
47
+ // Delete feature Idx and its frequency from FeatureFreqs.
48
+ bool DeleteFeatureFreq (uint32_t Idx) {
49
+ if (FeatureFreqs.empty ())
50
+ return false ;
51
+
52
+ // Binary search over local feature frequencies sorted by index.
53
+ auto Lower = std::lower_bound (FeatureFreqs.begin (), FeatureFreqs.end (),
54
+ std::pair<uint32_t , uint16_t >(Idx, 0 ));
55
+
56
+ if (Lower != FeatureFreqs.end () && Lower->first == Idx) {
57
+ FeatureFreqs.erase (Lower);
58
+ return true ;
59
+ }
60
+ return false ;
61
+ }
62
+
63
+ // Assign more energy to a high-entropy seed, i.e., that reveals more
64
+ // information about the globally rare features in the neighborhood
65
+ // of the seed. Since we do not know the entropy of a seed that has
66
+ // never been executed we assign fresh seeds maximum entropy and
67
+ // let II->Energy approach the true entropy from above.
68
+ void UpdateEnergy (size_t GlobalNumberOfFeatures) {
69
+ Energy = 0.0 ;
70
+ SumIncidence = 0 ;
71
+
72
+ // Apply add-one smoothing to locally discovered features.
73
+ for (auto F : FeatureFreqs) {
74
+ size_t LocalIncidence = F.second + 1 ;
75
+ Energy -= LocalIncidence * logl (LocalIncidence);
76
+ SumIncidence += LocalIncidence;
77
+ }
78
+
79
+ // Apply add-one smoothing to locally undiscovered features.
80
+ // PreciseEnergy -= 0; // since logl(1.0) == 0)
81
+ SumIncidence += (GlobalNumberOfFeatures - FeatureFreqs.size ());
82
+
83
+ // Add a single locally abundant feature apply add-one smoothing.
84
+ size_t AbdIncidence = NumExecutedMutations + 1 ;
85
+ Energy -= AbdIncidence * logl (AbdIncidence);
86
+ SumIncidence += AbdIncidence;
87
+
88
+ // Normalize.
89
+ if (SumIncidence != 0 )
90
+ Energy = (Energy / SumIncidence) + logl (SumIncidence);
91
+ }
92
+
93
+ // Increment the frequency of the feature Idx.
94
+ void UpdateFeatureFrequency (uint32_t Idx) {
95
+ NeedsEnergyUpdate = true ;
96
+
97
+ // The local feature frequencies is an ordered vector of pairs.
98
+ // If there are no local feature frequencies, push_back preserves order.
99
+ // Set the feature frequency for feature Idx32 to 1.
100
+ if (FeatureFreqs.empty ()) {
101
+ FeatureFreqs.push_back (std::pair<uint32_t , uint16_t >(Idx, 1 ));
102
+ return ;
103
+ }
104
+
105
+ // Binary search over local feature frequencies sorted by index.
106
+ auto Lower = std::lower_bound (FeatureFreqs.begin (), FeatureFreqs.end (),
107
+ std::pair<uint32_t , uint16_t >(Idx, 0 ));
108
+
109
+ // If feature Idx32 already exists, increment its frequency.
110
+ // Otherwise, insert a new pair right after the next lower index.
111
+ if (Lower != FeatureFreqs.end () && Lower->first == Idx) {
112
+ Lower->second ++;
113
+ } else {
114
+ FeatureFreqs.insert (Lower, std::pair<uint32_t , uint16_t >(Idx, 1 ));
115
+ }
116
+ }
117
+ };
118
+
119
+ struct EntropicOptions {
120
+ bool Enabled;
121
+ size_t NumberOfRarestFeatures;
122
+ size_t FeatureFrequencyThreshold;
41
123
};
42
124
43
125
class InputCorpus {
44
- static const size_t kFeatureSetSize = 1 << 21 ;
45
- public:
46
- InputCorpus (const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
126
+ static const uint32_t kFeatureSetSize = 1 << 21 ;
127
+ static const uint8_t kMaxMutationFactor = 20 ;
128
+ static const size_t kSparseEnergyUpdates = 100 ;
129
+
130
+ size_t NumExecutedMutations = 0 ;
131
+
132
+ EntropicOptions Entropic;
133
+
134
+ public:
135
+ InputCorpus (const std::string &OutputCorpus, EntropicOptions Entropic)
136
+ : Entropic(Entropic), OutputCorpus(OutputCorpus) {
47
137
memset (InputSizesPerFeature, 0 , sizeof (InputSizesPerFeature));
48
138
memset (SmallestElementPerFeature, 0 , sizeof (SmallestElementPerFeature));
49
139
}
@@ -70,6 +160,7 @@ class InputCorpus {
70
160
Res = std::max (Res, II->U .size ());
71
161
return Res;
72
162
}
163
+ void IncrementNumExecutedMutations () { NumExecutedMutations++; }
73
164
74
165
size_t NumInputsThatTouchFocusFunction () {
75
166
return std::count_if (Inputs.begin (), Inputs.end (), [](const InputInfo *II) {
@@ -99,6 +190,10 @@ class InputCorpus {
99
190
II.MayDeleteFile = MayDeleteFile;
100
191
II.UniqFeatureSet = FeatureSet;
101
192
II.HasFocusFunction = HasFocusFunction;
193
+ // Assign maximal energy to the new seed.
194
+ II.Energy = RareFeatures.empty () ? 1.0 : log (RareFeatures.size ());
195
+ II.SumIncidence = RareFeatures.size ();
196
+ II.NeedsEnergyUpdate = false ;
102
197
std::sort (II.UniqFeatureSet .begin (), II.UniqFeatureSet .end ());
103
198
ComputeSHA1 (U.data (), U.size (), II.Sha1 );
104
199
auto Sha1Str = Sha1ToString (II.Sha1 );
@@ -111,7 +206,7 @@ class InputCorpus {
111
206
// But if we don't, we'll use the DFT of its base input.
112
207
if (II.DataFlowTraceForFocusFunction .empty () && BaseII)
113
208
II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction ;
114
- UpdateCorpusDistribution () ;
209
+ DistributionNeedsUpdate = true ;
115
210
PrintCorpus ();
116
211
// ValidateFeatureSet();
117
212
return &II;
@@ -162,7 +257,7 @@ class InputCorpus {
162
257
Hashes.insert (Sha1ToString (II->Sha1 ));
163
258
II->U = U;
164
259
II->Reduced = true ;
165
- UpdateCorpusDistribution () ;
260
+ DistributionNeedsUpdate = true ;
166
261
}
167
262
168
263
bool HasUnit (const Unit &U) { return Hashes.count (Hash (U)); }
@@ -175,6 +270,7 @@ class InputCorpus {
175
270
176
271
// Returns an index of random unit from the corpus to mutate.
177
272
size_t ChooseUnitIdxToMutate (Random &Rand) {
273
+ UpdateCorpusDistribution (Rand);
178
274
size_t Idx = static_cast <size_t >(CorpusDistribution (Rand));
179
275
assert (Idx < Inputs.size ());
180
276
return Idx;
@@ -210,10 +306,65 @@ class InputCorpus {
210
306
InputInfo &II = *Inputs[Idx];
211
307
DeleteFile (II);
212
308
Unit ().swap (II.U );
309
+ II.Energy = 0.0 ;
310
+ II.NeedsEnergyUpdate = false ;
311
+ DistributionNeedsUpdate = true ;
213
312
if (FeatureDebug)
214
313
Printf (" EVICTED %zd\n " , Idx);
215
314
}
216
315
316
+ void AddRareFeature (uint32_t Idx) {
317
+ // Maintain *at least* TopXRarestFeatures many rare features
318
+ // and all features with a frequency below ConsideredRare.
319
+ // Remove all other features.
320
+ while (RareFeatures.size () > Entropic.NumberOfRarestFeatures &&
321
+ FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold ) {
322
+
323
+ // Find most and second most abbundant feature.
324
+ uint32_t MostAbundantRareFeatureIndices[2 ] = {RareFeatures[0 ],
325
+ RareFeatures[0 ]};
326
+ size_t Delete = 0 ;
327
+ for (size_t i = 0 ; i < RareFeatures.size (); i++) {
328
+ uint32_t Idx2 = RareFeatures[i];
329
+ if (GlobalFeatureFreqs[Idx2] >=
330
+ GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0 ]]) {
331
+ MostAbundantRareFeatureIndices[1 ] = MostAbundantRareFeatureIndices[0 ];
332
+ MostAbundantRareFeatureIndices[0 ] = Idx2;
333
+ Delete = i;
334
+ }
335
+ }
336
+
337
+ // Remove most abundant rare feature.
338
+ RareFeatures[Delete] = RareFeatures.back ();
339
+ RareFeatures.pop_back ();
340
+
341
+ for (auto II : Inputs) {
342
+ if (II->DeleteFeatureFreq (MostAbundantRareFeatureIndices[0 ]))
343
+ II->NeedsEnergyUpdate = true ;
344
+ }
345
+
346
+ // Set 2nd most abundant as the new most abundant feature count.
347
+ FreqOfMostAbundantRareFeature =
348
+ GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1 ]];
349
+ }
350
+
351
+ // Add rare feature, handle collisions, and update energy.
352
+ RareFeatures.push_back (Idx);
353
+ GlobalFeatureFreqs[Idx] = 0 ;
354
+ for (auto II : Inputs) {
355
+ II->DeleteFeatureFreq (Idx);
356
+
357
+ // Apply add-one smoothing to this locally undiscovered feature.
358
+ // Zero energy seeds will never be fuzzed and remain zero energy.
359
+ if (II->Energy > 0.0 ) {
360
+ II->SumIncidence += 1 ;
361
+ II->Energy += logl (II->SumIncidence ) / II->SumIncidence ;
362
+ }
363
+ }
364
+
365
+ DistributionNeedsUpdate = true ;
366
+ }
367
+
217
368
bool AddFeature (size_t Idx, uint32_t NewSize, bool Shrink) {
218
369
assert (NewSize);
219
370
Idx = Idx % kFeatureSetSize ;
@@ -228,6 +379,8 @@ class InputCorpus {
228
379
DeleteInput (OldIdx);
229
380
} else {
230
381
NumAddedFeatures++;
382
+ if (Entropic.Enabled )
383
+ AddRareFeature ((uint32_t )Idx);
231
384
}
232
385
NumUpdatedFeatures++;
233
386
if (FeatureDebug)
@@ -239,6 +392,30 @@ class InputCorpus {
239
392
return false ;
240
393
}
241
394
395
+ // Increment frequency of feature Idx globally and locally.
396
+ void UpdateFeatureFrequency (InputInfo *II, size_t Idx) {
397
+ uint32_t Idx32 = Idx % kFeatureSetSize ;
398
+
399
+ // Saturated increment.
400
+ if (GlobalFeatureFreqs[Idx32] == 0xFFFF )
401
+ return ;
402
+ uint16_t Freq = GlobalFeatureFreqs[Idx32]++;
403
+
404
+ // Skip if abundant.
405
+ if (Freq > FreqOfMostAbundantRareFeature ||
406
+ std::find (RareFeatures.begin (), RareFeatures.end (), Idx32) ==
407
+ RareFeatures.end ())
408
+ return ;
409
+
410
+ // Update global frequencies.
411
+ if (Freq == FreqOfMostAbundantRareFeature)
412
+ FreqOfMostAbundantRareFeature++;
413
+
414
+ // Update local frequencies.
415
+ if (II)
416
+ II->UpdateFeatureFrequency (Idx32);
417
+ }
418
+
242
419
size_t NumFeatures () const { return NumAddedFeatures; }
243
420
size_t NumFeatureUpdates () const { return NumUpdatedFeatures; }
244
421
@@ -265,19 +442,60 @@ class InputCorpus {
265
442
// Updates the probability distribution for the units in the corpus.
266
443
// Must be called whenever the corpus or unit weights are changed.
267
444
//
268
- // Hypothesis: units added to the corpus last are more interesting.
269
- //
270
- // Hypothesis: inputs with infrequent features are more interesting.
271
- void UpdateCorpusDistribution () {
445
+ // Hypothesis: inputs that maximize information about globally rare features
446
+ // are interesting.
447
+ void UpdateCorpusDistribution (Random &Rand) {
448
+ // Skip update if no seeds or rare features were added/deleted.
449
+ // Sparse updates for local change of feature frequencies,
450
+ // i.e., randomly do not skip.
451
+ if (!DistributionNeedsUpdate &&
452
+ (!Entropic.Enabled || Rand (kSparseEnergyUpdates )))
453
+ return ;
454
+
455
+ DistributionNeedsUpdate = false ;
456
+
272
457
size_t N = Inputs.size ();
273
458
assert (N);
274
459
Intervals.resize (N + 1 );
275
460
Weights.resize (N);
276
461
std::iota (Intervals.begin (), Intervals.end (), 0 );
277
- for (size_t i = 0 ; i < N; i++)
278
- Weights[i] = Inputs[i]->NumFeatures
279
- ? (i + 1 ) * (Inputs[i]->HasFocusFunction ? 1000 : 1 )
280
- : 0 .;
462
+
463
+ bool VanillaSchedule = true ;
464
+ if (Entropic.Enabled ) {
465
+ for (auto II : Inputs) {
466
+ if (II->NeedsEnergyUpdate && II->Energy != 0.0 ) {
467
+ II->NeedsEnergyUpdate = false ;
468
+ II->UpdateEnergy (RareFeatures.size ());
469
+ }
470
+ }
471
+
472
+ for (size_t i = 0 ; i < N; i++) {
473
+
474
+ if (Inputs[i]->NumFeatures == 0 ) {
475
+ // If the seed doesn't represent any features, assign zero energy.
476
+ Weights[i] = 0 .;
477
+ } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor >
478
+ NumExecutedMutations / Inputs.size ()) {
479
+ // If the seed was fuzzed a lot more than average, assign zero energy.
480
+ Weights[i] = 0 .;
481
+ } else {
482
+ // Otherwise, simply assign the computed energy.
483
+ Weights[i] = Inputs[i]->Energy ;
484
+ }
485
+
486
+ // If energy for all seeds is zero, fall back to vanilla schedule.
487
+ if (Weights[i] > 0.0 )
488
+ VanillaSchedule = false ;
489
+ }
490
+ }
491
+
492
+ if (VanillaSchedule) {
493
+ for (size_t i = 0 ; i < N; i++)
494
+ Weights[i] = Inputs[i]->NumFeatures
495
+ ? (i + 1 ) * (Inputs[i]->HasFocusFunction ? 1000 : 1 )
496
+ : 0 .;
497
+ }
498
+
281
499
if (FeatureDebug) {
282
500
for (size_t i = 0 ; i < N; i++)
283
501
Printf (" %zd " , Inputs[i]->NumFeatures );
@@ -302,6 +520,11 @@ class InputCorpus {
302
520
uint32_t InputSizesPerFeature[kFeatureSetSize ];
303
521
uint32_t SmallestElementPerFeature[kFeatureSetSize ];
304
522
523
+ bool DistributionNeedsUpdate = true ;
524
+ uint16_t FreqOfMostAbundantRareFeature = 0 ;
525
+ uint16_t GlobalFeatureFreqs[kFeatureSetSize ] = {};
526
+ Vector<uint32_t > RareFeatures;
527
+
305
528
std::string OutputCorpus;
306
529
};
307
530
0 commit comments