Enable consistently setting the random seed (#792)

This change adds two new settings, as specified in #780. - A specific `random_seed` may now be set in the engine settings, which will be used everywhere random values are generated, except where a different random_seed is specified to checkers through the checker settings. - An option to generate a new random seed `generate_random_seed`, which is helpful for CI/CD cases that run in `random-walk` mode and would like to get different sequences exercised on every run Testing: - added new test
microsoft · Jul 14, 2023 · 795e53e · 795e53e
1 parent 9d4619c
commit 795e53e
Show file tree

Hide file tree

Showing 10 changed files with 161 additions and 9 deletions.
diff --git a/docs/user-guide/SettingsFile.md b/docs/user-guide/SettingsFile.md
@@ -501,6 +501,16 @@ For instance, any custom payloads
 or fuzzable values for this endpoint will be taken from the specified custom dictionary
 instead of the default dictionary.
 
+### random_seed: int (default 12345)
+The random seed to use for the RESTler invocation.  The same random seed will
+always be used if none is specified and `generate_random_seed` is `False`.  Checkers may have a separate `random_seed` setting that overrides this setting.
+
+### generate_random_seed: bool (default False)
+When `True`, generate a new random seed instead of using the default or user-specified
+`random_seed`.  This setting also overrides any `random_seed` checker settings.
+The random seed that was used for the run is logged in main.txt as well as in the
+testing summary.
+
 ### custom_value_generators: string (default None)
 If this setting is set to a valid path with a ```.py``` extension,
 RESTler will try to import the contents of this

diff --git a/restler/checkers/body_schema_fuzzer.py b/restler/checkers/body_schema_fuzzer.py
@@ -8,9 +8,10 @@
 import json
 import random
 
+from restler_settings import Settings
+
 from engine.fuzzing_parameters.fuzzing_utils import *
 from engine.fuzzing_parameters.request_params import *
-
 from engine.fuzzing_parameters.param_combinations import JsonBodySchemaFuzzerBase
 
 class BodySchemaStructuralFuzzer(JsonBodySchemaFuzzerBase):
@@ -65,6 +66,8 @@ def run(self, schema_seed, config={}):
             self._shuffle_combination = config['shuffle_combination']
         if 'random_seed' in config:
             self._random_seed = config['random_seed']
+        else:
+            self._random_seed = Settings().random_seed
 
         # overwrite fuzzer-specific configuration
         self._set_fuzzer_config()

diff --git a/restler/checkers/invalid_value_checker.py b/restler/checkers/invalid_value_checker.py
@@ -178,6 +178,8 @@ def init_mutations(self):
             self._value_generators_file_path = default_value_generators_file_path
 
         self._override_random_seed = Settings().get_checker_arg(self._friendly_name, 'random_seed')
+        if self._override_random_seed is None:
+            self._override_random_seed = Settings().random_seed
 
     def apply(self, rendered_sequence, lock):
         """ Fuzzes each value in the parameters of this request as specified by

diff --git a/restler/engine/core/driver.py b/restler/engine/core/driver.py
@@ -7,14 +7,14 @@
 import traceback
 import copy
 import time
-import random
 import inspect
 import itertools
 import functools
 import multiprocessing
 from multiprocessing.dummy import Pool as ThreadPool
 from collections import deque
 import re
+from random import Random
 
 from restler_settings import Settings
 import utils.logger as logger
@@ -56,7 +56,7 @@ def validate_dependencies(consumer_req, producer_seq):
     return consumer_req.consumes <= set(producer_requests)
 
 
-def extend(seq_collection, fuzzing_requests, lock):
+def extend(seq_collection, fuzzing_requests, lock, random_gen):
     """ Extends each sequence currently present in collection by any request
     from request collection whose dependencies can be resolved if appended at
     the end of the target sequence.
@@ -121,7 +121,7 @@ def extend(seq_collection, fuzzing_requests, lock):
     # one randomly selected sequence
     if Settings().fuzzing_mode == 'random-walk':
         if len(seq_collection) > 0:
-            rand_int = random.randint(prev_len, len(seq_collection) - 1)
+            rand_int = random_gen.randint(prev_len, len(seq_collection) - 1)
             return seq_collection[rand_int: rand_int + 1], extended_requests[rand_int: rand_int + 1]
         else:
             return [], []
@@ -575,6 +575,7 @@ def generate_sequences(fuzzing_requests, checkers, fuzzing_jobs=1, garbage_colle
 
     fuzzing_mode = Settings().fuzzing_mode
     max_len = Settings().max_sequence_length
+    random_gen = Random(Settings().random_seed)
 
     if fuzzing_jobs > 1:
         render = render_parallel
@@ -676,7 +677,9 @@ def generate_sequences(fuzzing_requests, checkers, fuzzing_jobs=1, garbage_colle
                     # go to the next generation
                     continue
             else:
-                seq_collection, extended_requests = extend(seq_collection, fuzzing_requests, global_lock)
+                seq_collection, extended_requests = extend(seq_collection,
+                                                           fuzzing_requests,
+                                                           global_lock, random_gen)
 
             print(f"{formatting.timestamp()}: Generation: {generation} ")
             logger.write_to_main(

diff --git a/restler/engine/core/requests.py b/restler/engine/core/requests.py
@@ -5,15 +5,15 @@
 from __future__ import print_function
 import time
 import types
-import random
-random.seed(12345)
 import itertools
 import functools, operator
 import collections
 import datetime
 import copy
 
 from restler_settings import Settings
+from random import Random
+
 import engine.core.request_utilities as request_utilities
 from engine.core.request_utilities import str_to_hex_def
 from engine.fuzzing_parameters.request_examples import RequestExamples
@@ -305,6 +305,8 @@ def __init__(self, definition=[], requestId=None):
         self._last_rendered_schema_request = None
         self._is_resource_generator = None
 
+        self._random = Random(Settings().random_seed)
+
         # Check for empty request before assigning ids
         if self._definition:
             self._set_hex_definitions(requestId)
@@ -1007,7 +1009,7 @@ def _raise_dict_err(type, tag):
                     values = [(values, quoted, writer_variable)]
 
             if Settings().fuzzing_mode == 'random-walk' and not preprocessing:
-                random.shuffle(values)
+                self._random.shuffle(values)
 
             if len(values) == 0:
                 _raise_dict_err(primitive_type, "empty value list")

diff --git a/restler/restler.py b/restler/restler.py
@@ -442,6 +442,10 @@ def signal_handler(sig, frame):
         }
         )
 
+    # Write the random seed to main in case the run exits in the middle and needs to be
+    # restarted with the same seed
+    logger.write_to_main(f"Random seed: {Settings().random_seed}")
+
     # Initialize the fuzzing monitor
     monitor = fuzzing_monitor.FuzzingMonitor()
 

diff --git a/restler/restler_settings.py b/restler/restler_settings.py
@@ -8,6 +8,7 @@
 import json
 import sys
 import re
+import time
 
 class TokenAuthMethod(Enum):
     """ Enum of token auth methods """
@@ -508,6 +509,14 @@ def convert_wildcards_to_regex(str_value):
         ## If set, poll for async resource creation before continuing
         self._wait_for_async_resource_creation = SettingsArg('wait_for_async_resource_creation', bool, True, user_args)
 
+        ## The random seed to use (may be overridden by checker-specific random seeds)
+        self._random_seed = SettingsArg('random_seed', int, 12345, user_args, minval=0)
+        ## Generate a new random seed instead of using the one specified
+        ## When specified, the seed will be used for all of the checkers as well.
+        self._generate_random_seed = SettingsArg('generate_random_seed', bool, False, user_args)
+        if self._generate_random_seed.val:
+            self._random_seed.val = time.time()
+
         self._connection_settings = ConnectionSettings(self._target_ip.val,
                                                        self._target_port.val,
                                                        not self._no_ssl.val,
@@ -667,6 +676,14 @@ def reconnect_on_every_request(self):
     def max_sequence_length(self):
         return self._max_sequence_length.val
 
+    @property
+    def random_seed(self):
+        return self._random_seed.val
+
+    @property
+    def generate_random_seed(self):
+        return self._generate_random_seed.val
+
     @property
     def no_tokens_in_logs(self):
         return self._no_tokens_in_logs.val

diff --git a/restler/unit_tests/test_basic_functionality_end_to_end.py b/restler/unit_tests/test_basic_functionality_end_to_end.py
@@ -1179,3 +1179,111 @@ def check_gc_stats(max_objects):
         run_test(None, False)
         check_gc_error(None)
         check_gc_stats(None)
+
+    def test_random_seed_settings(self):
+        """ This test is identical to test_abc_minimal_smoke_test, except that it modifies the random seed
+        settings.  The test checks that the same sequences are sent in 'test' mode, but different sequences
+        are sent in 'random-walk' mode, and tests that the seed was output to the testing summary.
+        """
+        def create_settings_file(settings):
+            new_settings_file_path = os.path.join(Test_File_Directory, "random_seed_settings.json")
+            try:
+                json_settings = json.dumps(settings, indent=4)
+                with open(new_settings_file_path, "w") as f:
+                    f.write(json_settings)
+                return new_settings_file_path
+            except Exception as e:
+                print(e)
+                return None
+
+        def test_with_settings(settings):
+            try:
+                new_settings_file_path = create_settings_file(settings)
+                self.run_abc_smoke_test(Test_File_Directory, "abc_test_grammar.py",
+                                        "directed-smoke-test", settings_file=new_settings_file_path)
+            finally:
+                ## Clean up temporary settings file
+                if os.path.exists(new_settings_file_path):
+                    os.remove(new_settings_file_path)
+
+            experiments_dir = self.get_experiments_dir()
+
+            # Make sure all requests were successfully rendered.  This is because the comparisons below do not
+            # take status codes into account
+
+            # Make sure the right number of requests was sent.
+            testing_summary_file_path = os.path.join(experiments_dir, "logs", "testing_summary.json")
+            DEFAULT_RANDOM_SEED = 12345
+            try:
+                with open(testing_summary_file_path, 'r') as file:
+                    testing_summary = json.loads(file.read())
+                    total_requests_sent = testing_summary["total_requests_sent"]["main_driver"]
+                    num_fully_valid = testing_summary["num_fully_valid"]
+                    self.assertEqual(num_fully_valid, 5)
+                    self.assertLessEqual(total_requests_sent, 14)
+
+                    # Make sure the random seed was output to the testing summary
+                    if 'random_seed' in settings:
+                        if 'generate_random_seed' in settings:
+                            self.assertNotEqual(testing_summary["settings"]["random_seed"], settings["random_seed"])
+                        else:
+                            self.assertEqual(testing_summary["settings"]["random_seed"], settings["random_seed"])
+                    else:
+                        if 'generate_random_seed' in settings:
+                            self.assertNotEqual(testing_summary["settings"]["random_seed"], DEFAULT_RANDOM_SEED)
+                        else:
+                            self.assertEqual(testing_summary["settings"]["random_seed"], DEFAULT_RANDOM_SEED)
+
+                default_parser = FuzzingLogParser(os.path.join(Test_File_Directory, "abc_smoke_test_testing_log.txt"))
+                test_parser = FuzzingLogParser(self.get_network_log_path(experiments_dir, logger.LOG_TYPE_TESTING))
+                self.assertTrue(default_parser.diff_log(test_parser))
+            except TestFailedException:
+                self.fail("Smoke test failed: Fuzzing")
+
+        def random_walk_test(settings, expected_equal):
+
+            try:
+                new_settings_file_path = create_settings_file(settings)
+                # First run
+                self.run_abc_smoke_test(Test_File_Directory, "abc_test_grammar.py",
+                                        "random-walk", settings_file=new_settings_file_path)
+                experiments_dir = self.get_experiments_dir()
+
+                parser_1 = FuzzingLogParser(self.get_network_log_path(experiments_dir, logger.LOG_TYPE_TESTING), max_seq=20)
+
+                # Second run
+                self.run_abc_smoke_test(Test_File_Directory, "abc_test_grammar.py",
+                                        "random-walk", settings_file=new_settings_file_path)
+                experiments_dir = self.get_experiments_dir()
+
+                parser_2 = FuzzingLogParser(self.get_network_log_path(experiments_dir, logger.LOG_TYPE_TESTING), max_seq=20)
+                diff_result = parser_1.diff_log(parser_2)
+                if expected_equal:
+                    self.assertTrue(diff_result)
+                else:
+                    self.assertFalse(diff_result)
+
+            finally:
+                ## Clean up temporary settings file
+                if os.path.exists(new_settings_file_path):
+                    os.remove(new_settings_file_path)
+
+        # Test with a random seed
+        test_with_settings({"random_seed": 1234})
+
+        # Test with a random seed and generate_random_seed
+        test_with_settings({"random_seed": 1234, "generate_random_seed": True})
+
+        # Test with no random seed
+        test_with_settings({})
+
+        # Test with generate_random_seed
+        test_with_settings({"generate_random_seed": True})
+
+        # Test two runs without a random seed specified.  The same random seed should be used,
+        # and the payloads are expected to be equal.
+        random_walk_test({ "time_budget": 0.01}, True)
+
+        # Test two runs with 'generate_random_seed' set to True.  Different random seeds should be used,
+        # and the payloads are expected to be different.
+        random_walk_test({"generate_random_seed": True, "time_budget": 0.01}, False)
diff --git a/restler/utils/logger.py b/restler/utils/logger.py
@@ -1027,7 +1027,9 @@ def print_generation_stats(req_collection, fuzzing_monitor, global_lock, final=F
         testing_summary['total_requests_sent'] = total_requests_sent
         testing_summary['bug_buckets'] = bug_buckets
         testing_summary['reproducible_bug_buckets'] = BugBuckets.Instance().repro_bug_buckets()
-
+        settings_summary = OrderedDict()
+        settings_summary['random_seed'] = Settings().random_seed
+        testing_summary['settings'] = settings_summary
         with open(os.path.join(LOGS_DIR, "testing_summary.json"), "w+", encoding='utf-8') as summary_json:
             json.dump(testing_summary, summary_json, indent=4)
 

diff --git a/src/driver/Types.fs b/src/driver/Types.fs
@@ -182,6 +182,7 @@ module Engine =
             total_requests_sent : Dictionary<string, int>
             bug_buckets : Dictionary<string, int>
             reproducible_bug_buckets : Dictionary<string, int>
+            settings : Dictionary<string, obj>
         }
 
 /// Helper module to produce compact messages in the console, but more