diff --git a/common.json b/common.json
index 67c5ab3bd1e2..fdef930cc0fb 100644
--- a/common.json
+++ b/common.json
@@ -4,7 +4,7 @@
     "Jsonnet files should not include this file directly but use ci/common.jsonnet instead."
   ],
 
-  "mx_version": "7.65.3",
+  "mx_version": "7.67.0",
 
   "COMMENT.jdks": "When adding or removing JDKs keep in sync with JDKs in ci/common.jsonnet",
   "jdks": {
diff --git a/sdk/mx.sdk/mx_sdk_benchmark.py b/sdk/mx.sdk/mx_sdk_benchmark.py
index aaaa28f28918..f71f54ee85ec 100644
--- a/sdk/mx.sdk/mx_sdk_benchmark.py
+++ b/sdk/mx.sdk/mx_sdk_benchmark.py
@@ -72,7 +72,7 @@
 import mx_sdk_vm_impl
 import mx_util
 from mx_util import Stage, StageName, Layer
-from mx_benchmark import DataPoints, DataPoint, BenchmarkSuite, Vm, SingleBenchmarkExecutionContext, ForkInfo
+from mx_benchmark import DataPoints, DataPoint, BenchmarkSuite, bm_exec_context, ConstantContextValueManager, SingleBenchmarkManager
 from mx_sdk_vm_impl import svm_experimental_options
 
 _suite = mx.suite('sdk')
@@ -1786,7 +1786,7 @@ def get_layer_aware_build_args(self) -> List[str]:
 
     def run_stage_image(self):
         executable_name_args = ['-o', self.config.final_image_name]
-        pgo_args = [f"--pgo={self.config.profile_path}"]
+        pgo_args = [f"--pgo={self.config.bm_suite.get_pgo_profile_for_image_build(self.config.profile_path)}"]
         if self.pgo_use_perf:
             # -g is already set in base_image_build_args if we're not using perf. When using perf, if debug symbols
             # are present they will interfere with sample decoding using source mappings.
@@ -1946,8 +1946,8 @@ def _prepare_for_running(self, args, out, err, cwd, nonZeroIsFatal):
         self.stages_context = StagesContext(self, out, err, nonZeroIsFatal, os.path.abspath(cwd if cwd else os.getcwd()))
         file_name = f"staged-benchmark.{self.ext}"
         output_dir = self.bmSuite.get_image_output_dir(
-            self.bmSuite.benchmark_output_dir(self.bmSuite.execution_context.benchmark, args),
-            self.bmSuite.get_full_image_name(self.bmSuite.get_base_image_name(), self.bmSuite.execution_context.virtual_machine.config_name())
+            self.bmSuite.benchmark_output_dir(bm_exec_context().get("benchmark"), args),
+            self.bmSuite.get_full_image_name(self.bmSuite.get_base_image_name(), bm_exec_context().get("vm").config_name())
         )
         self.staged_program_file_path = output_dir / file_name
         self.staged_program_file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -3178,7 +3178,7 @@ def subgroup(self):
         return "graal-compiler"
 
     def benchmarkName(self):
-        return self.execution_context.benchmark
+        return bm_exec_context().get("benchmark")
 
     def benchmarkList(self, bmSuiteArgs):
         exclude = []
@@ -3226,8 +3226,9 @@ def validateEnvironment(self):
         self.baristaProjectConfigurationPath()
         self.baristaHarnessPath()
 
-    def new_execution_context(self, vm: Optional[Vm], benchmarks: List[str], bmSuiteArgs: List[str], fork_info: Optional[ForkInfo] = None) -> SingleBenchmarkExecutionContext:
-        return SingleBenchmarkExecutionContext(self, vm, benchmarks, bmSuiteArgs, fork_info)
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        with SingleBenchmarkManager(self):
+            return super().run(benchmarks, bmSuiteArgs)
 
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         # Pass the VM options, BaristaCommand will form the final command.
@@ -3490,7 +3491,7 @@ def produceHarnessCommand(self, cmd, suite):
             jvm_vm_options = jvm_cmd[index_of_java_exe + 1:]
 
             # Verify that the run arguments don't already contain a "--mode" option
-            run_args = suite.runArgs(suite.execution_context.bmSuiteArgs) + self._energyTrackerExtraOptions(suite)
+            run_args = suite.runArgs(bm_exec_context().get("bm_suite_args")) + self._energyTrackerExtraOptions(suite)
             mode_pattern = r"^(?:-m|--mode)(=.*)?$"
             mode_match = self._regexFindInCommand(run_args, mode_pattern)
             if mode_match:
@@ -4128,7 +4129,7 @@ def intercept_run(self, super_delegate: BenchmarkSuite, benchmarks, bm_suite_arg
         datapoints: List[DataPoint] = []
 
         vm = self.get_vm_registry().get_vm_from_suite_args(bm_suite_args)
-        with self.new_execution_context(vm, benchmarks, bm_suite_args):
+        with ConstantContextValueManager("vm", vm):
             effective_stages, complete_stage_list = vm.prepare_stages(self, bm_suite_args)
             self.stages_info = StagesInfo(effective_stages, complete_stage_list, vm)
 
@@ -4261,7 +4262,7 @@ def run(self, benchmarks, bm_suite_args: List[str]) -> DataPoints:
         fallback_reason = self.fallback_mode_reason(bm_suite_args)
 
         vm = self.get_vm_registry().get_vm_from_suite_args(bm_suite_args)
-        with self.new_execution_context(vm, benchmarks, bm_suite_args):
+        with ConstantContextValueManager("vm", vm):
             effective_stages, complete_stage_list = vm.prepare_stages(self, bm_suite_args)
             self.stages_info = StagesInfo(effective_stages, complete_stage_list, vm, bool(fallback_reason))
 
@@ -4502,6 +4503,13 @@ def get_image_output_dir(self, benchmark_output_dir: str, full_image_name: str)
         """
         return Path(benchmark_output_dir).absolute() / "native-image-benchmarks" / full_image_name
 
+    def get_pgo_profile_for_image_build(self, default_pgo_profile: str) -> str:
+        vm_args = self.vmArgs(bm_exec_context().get("bm_suite_args"))
+        parsed_arg = parse_prefixed_arg("-Dnative-image.benchmark.pgo=", vm_args, "Native Image benchmark PGO profiles should only be specified once!")
+        if not parsed_arg:
+            return default_pgo_profile
+        return parsed_arg
+
 
 def measureTimeToFirstResponse(bmSuite):
     protocolHost = bmSuite.serviceHost()
diff --git a/sdk/mx.sdk/suite.py b/sdk/mx.sdk/suite.py
index 6bc5fc87218a..4781d3be65ef 100644
--- a/sdk/mx.sdk/suite.py
+++ b/sdk/mx.sdk/suite.py
@@ -39,7 +39,7 @@
 # SOFTWARE.
 #
 suite = {
-  "mxversion": "7.58.6",
+  "mxversion": "7.67.0",
   "name" : "sdk",
   "version" : "25.1.0",
   "release" : False,
diff --git a/substratevm/mx.substratevm/mx_substratevm_benchmark.py b/substratevm/mx.substratevm/mx_substratevm_benchmark.py
index f914b92d1a46..e36e9d8315f8 100644
--- a/substratevm/mx.substratevm/mx_substratevm_benchmark.py
+++ b/substratevm/mx.substratevm/mx_substratevm_benchmark.py
@@ -36,6 +36,7 @@
 import mx
 import mx_benchmark
 import mx_sdk_benchmark
+from mx_benchmark import bm_exec_context, SingleBenchmarkManager
 from mx_sdk_benchmark import SUCCESSFUL_STAGE_PATTERNS, parse_prefixed_args
 from mx_util import StageName, Layer
 
@@ -291,12 +292,7 @@ def benchmarkList(self, bmSuiteArgs):
 
     def default_stages(self) -> List[str]:
         if self.benchmarkName() == "micronaut-pegasus":
-            if (
-                self.execution_context and
-                self.execution_context.virtual_machine and
-                self.execution_context.virtual_machine.config_name() and
-                self.execution_context.virtual_machine.config_name().endswith("-ce")
-            ):
+            if bm_exec_context().has("vm") and bm_exec_context().get("vm").config_name().endswith("-ce"):
                 # fails on CE due to --enable-sbom EE only option injected from upstream pom (GR-66891)
                 return []
             # The 'agent' stage is not supported, as currently we cannot run micronaut-pegasus on the JVM (GR-59793)
@@ -394,7 +390,8 @@ def build_assertions(self, benchmark: str, is_gate: bool) -> List[str]:
         return super().build_assertions(benchmark, is_gate)
 
     def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
-        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+        with SingleBenchmarkManager(self):
+            return self.intercept_run(super(), benchmarks, bmSuiteArgs)
 
     def ensure_image_is_at_desired_location(self, bmSuiteArgs):
         if self.stages_info.current_stage.is_image() and self.application_fixed_image_name() is not None:
@@ -441,7 +438,7 @@ def _get_built_app_image(self, suite, stage):
             In the case of `instrument-run`, retrieves the image built during `instrument-image`.
             In the case of `run`, retrieves the image built during `image`.
             """
-            vm = suite.execution_context.virtual_machine
+            vm = bm_exec_context().get("vm")
             if stage.stage_name == StageName.INSTRUMENT_RUN:
                 return vm.config.instrumented_image_path
             else:
@@ -470,6 +467,7 @@ def produceHarnessCommand(self, cmd, suite):
                 raise TypeError(f"Expected an instance of {BaristaNativeImageBenchmarkSuite.__name__}, instead got an instance of {suite.__class__.__name__}")
 
             stage = suite.stages_info.current_stage
+            bm_suite_args = bm_exec_context().get("bm_suite_args")
             if stage.is_agent():
                 # BaristaCommand works for agent stage, since it's a JVM stage
                 cmd = self.produce_JVM_harness_command(cmd, suite)
@@ -477,8 +475,8 @@ def produceHarnessCommand(self, cmd, suite):
                 cmd += self._short_load_testing_phases()
                 # Add explicit agent stage args
                 cmd += self._energyTrackerExtraOptions(suite)
-                cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-jvm-arg=", suite.execution_context.bmSuiteArgs)
-                cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-agent-run-arg=", suite.execution_context.bmSuiteArgs)
+                cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-jvm-arg=", bm_suite_args)
+                cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-agent-run-arg=", bm_suite_args)
                 return cmd
 
             # Extract app image options and command prefix from the NativeImageVM command
@@ -499,18 +497,18 @@ def produceHarnessCommand(self, cmd, suite):
             ni_barista_cmd = [suite.baristaHarnessPath(), "--mode", "native", "--app-executable", app_image]
             if barista_workload is not None:
                 ni_barista_cmd.append(f"--config={barista_workload}")
-            ni_barista_cmd += suite.runArgs(suite.execution_context.bmSuiteArgs) + self._energyTrackerExtraOptions(suite)
-            ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-jvm-arg=", suite.execution_context.bmSuiteArgs)
+            ni_barista_cmd += suite.runArgs(bm_suite_args) + self._energyTrackerExtraOptions(suite)
+            ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-jvm-arg=", bm_suite_args)
             if stage.is_instrument():
                 # Make instrument run short
                 ni_barista_cmd += self._short_load_testing_phases()
-                if suite.execution_context.benchmark == "play-scala-hello-world":
+                if bm_exec_context().get("benchmark") == "play-scala-hello-world":
                     self._updateCommandOption(ni_barista_cmd, "--vm-options", "-v", "-Dpidfile.path=/dev/null")
                 # Add explicit instrument stage args
-                ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-profile-run-arg=", suite.execution_context.bmSuiteArgs) or parse_prefixed_args("-Dnative-image.benchmark.extra-run-arg=", suite.execution_context.bmSuiteArgs)
+                ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-profile-run-arg=", bm_suite_args) or parse_prefixed_args("-Dnative-image.benchmark.extra-run-arg=", bm_suite_args)
             else:
                 # Add explicit run stage args
-                ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-run-arg=", suite.execution_context.bmSuiteArgs)
+                ni_barista_cmd += parse_prefixed_args("-Dnative-image.benchmark.extra-run-arg=", bm_suite_args)
             if nivm_cmd_prefix:
                 self._updateCommandOption(ni_barista_cmd, "--cmd-app-prefix", "-p", " ".join(nivm_cmd_prefix))
             if nivm_app_options:
diff --git a/substratevm/mx.substratevm/suite.py b/substratevm/mx.substratevm/suite.py
index 1b05f5ce2d67..213463f0681a 100644
--- a/substratevm/mx.substratevm/suite.py
+++ b/substratevm/mx.substratevm/suite.py
@@ -1,6 +1,6 @@
 # pylint: disable=line-too-long
 suite = {
-    "mxversion": "7.58.6",
+    "mxversion": "7.67.0",
     "name": "substratevm",
     "version" : "25.1.0",
     "release" : False,
diff --git a/truffle/mx.truffle/mx_polybench/model.py b/truffle/mx.truffle/mx_polybench/model.py
index e85cd26d73a7..d1eacedd3a95 100644
--- a/truffle/mx.truffle/mx_polybench/model.py
+++ b/truffle/mx.truffle/mx_polybench/model.py
@@ -44,8 +44,9 @@
 import os
 import re
 import shutil
-from argparse import ArgumentParser
-from typing import Callable, Dict, FrozenSet, Iterable, List, NamedTuple, Optional, Set, Tuple, Union, Any
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Callable, Dict, FrozenSet, Iterable, List, NamedTuple, Optional, Set, Tuple, Union, Any, Generator
 
 import mx
 import mx_benchmark
@@ -63,9 +64,12 @@
     get_parser,
     DataPointsPostProcessor,
     ForkInfo,
-    BenchmarkExecutionContext,
-    Vm,
-    BenchmarkSuite,
+    BenchmarkExecutionConfiguration,
+    BenchmarkDispatcher,
+    BenchmarkDispatcherState,
+    bm_exec_context,
+    BoxContextValue,
+    ConstantContextValueManager,
 )
 from mx_jardistribution import JARDistribution
 
@@ -256,6 +260,687 @@ def _check_dist(dist_name: str, require_built: bool = True) -> Optional[str]:
         mx.abort(f"Unsupported distribution kind {type(dist)}")
 
 
+class StabilizingPolybenchBenchmarkDispatcher(mx_benchmark.DefaultBenchmarkDispatcher):
+    """
+    Custom dispatching class for non-native-image PolybenchBenchmarkSuite stable runs that facilitates scheduling based
+    on a `--stable-run-config` configuration file:
+    * Schedules the appropriate number of forks for each specified benchmark according to their configuration.
+
+    The `--stable-run-config` configuration file should be a JSON object, where each key is a benchmark name.
+    Each entry must include a "run-forks" dictionary with a "count" property that specifies the number of forks
+    to schedule for that benchmark.
+    Additional properties may be present, but they are not relevant to this dispatcher class.
+
+    Example:
+
+    If the `--stable-run-config` configuration file specifies the following configuration (fields irrelevant
+    for scheduling have been omitted):
+    ```
+    {
+      "interpreter/sieve.py": {
+        "run-forks": { "count": 1 }
+      },
+      "interpreter/fibonacci.py": {
+        "run-forks": { "count": 3 }
+      },
+      "interpreter/richards.py": {
+        "run-forks": { "count": 2 }
+      }
+    }
+    ```
+
+    This dispatcher will produce the following schedule (can be obtained by appending the `--dry-stable-run` option
+    to your `mx benchmark` command):
+    ```
+    * Bench batch #1
+      [#1] Fork #0: interpreter/richards.py
+      [#2] Fork #0: interpreter/fibonacci.py
+      [#3] Fork #0: interpreter/sieve.py
+    * Bench batch #2
+      [#4] Fork #1: interpreter/richards.py
+      [#5] Fork #1: interpreter/fibonacci.py
+    * Bench batch #3
+      [#6] Fork #2: interpreter/fibonacci.py
+    ```
+
+    * There are 3 batches, as that is how many the benchmark that requires the most run-forks (fibonacci.py) requires.
+    * The first batch includes all three benchmarks.
+    * Starting from the second batch, 'sieve.py' is excluded as it only requires 1 run-fork.
+    * In the third batch, 'richards.py' is excluded as it only requires 2 run-forks.
+    * For example, the log line "[#5] Fork #1: interpreter/fibonacci.py" indicates that:
+      * This is the 5th dispatch from the dispatcher - the 5th invocation of the `BenchmarkSuite.run` method.
+      * This dispatch will only execute the 'interpreter/fibonacci.py' benchmark.
+      * This is the 2nd fork (`metric.fork-number = 1`, but indexing starts from 0) of the 'interpreter/fibonacci.py'
+        benchmark.
+    """
+
+    def __init__(self, state: BenchmarkDispatcherState, stable_run_config: str):
+        super().__init__(state)
+        self._stable_run_config_path: Path = Path(stable_run_config).absolute()
+        if not self._stable_run_config_path.exists():
+            msg = f"Cannot initialize {self.__class__.__name__} instance with non-existing configuration file '{self._stable_run_config_path}'!"
+            raise ValueError(msg)
+        with open(self._stable_run_config_path) as f:
+            self._stable_run_config: dict = json.load(f)
+
+    def validated_env_dispatch(self) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """
+        Verifies the configuration, runs a sub-generator dry-run, and then finally yields from sub-generator.
+
+        1. Starts by parsing the benchmark list and verifying that each benchmark has an entry
+           in the stable run configuration file.
+        2. Executes a dry-run of the sub-generator to:
+           * compute the number of dispatches.
+           * log the dispatching schedule to stdout.
+        3. Yields from the sub-generator, which dispatches according to the schedule.
+        """
+        if not isinstance(self.state.suite, PolybenchBenchmarkSuite):
+            msg = f"Expected a PolybenchBenchmarkSuite instance, instead got an instance of {self.state.suite.__class__.__name__}!"
+            raise ValueError(msg)
+        self._verify_no_conflicting_args_are_set()
+        benchmarks = self._parse_benchmark_list()
+        if len(benchmarks) == 0:
+            raise ValueError(f"No benchmarks selected!")
+        self._verify_stable_run_config(benchmarks)
+        # Dry-run of the sub-generator to get the number of dispatches (yields) and present the schedule to stdout
+        mx.log(f"{self.__class__.__name__} will dispatch the following schedule:")
+        dispatch_count = len(list(self.dispatch_with_fork_context(benchmarks, 0, True)))
+        if self.state.suite.polybench_bench_suite_args(self.state.bm_suite_args).dry_stable_run:
+            return
+        # Delegate to sub-generator
+        mx.log(f"{self.__class__.__name__} is starting dispatch...")
+        yield from self.dispatch_with_fork_context(benchmarks, dispatch_count, False)
+
+    def dispatch_with_fork_context(
+        self, benchmarks: List[str], total_dispatch_count: int, dry_run: bool
+    ) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """Resets the fork number overrides and then yields according to the schedule."""
+        fork_number_dict = self._init_fork_number_dict(benchmarks)
+        with ConstantContextValueManager(PolybenchBenchmarkSuite.FORK_OVERRIDE_MAP, fork_number_dict):
+            yield from self.dispatch_and_log(benchmarks, total_dispatch_count, fork_number_dict, dry_run)
+
+    def dispatch_and_log(
+        self, benchmarks: List[str], total_dispatch_count: int, fork_number_dict: Dict[str, int], dry_run: bool
+    ) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """
+        Yields according to the schedule:
+        * First, it iterates over the benchmark batches, determined by the highest requested run-fork count.
+        * Second, it iterates over each benchmark which requires to be run in the current benchmark batch.
+        """
+        dispatch_counter = 0
+        number_of_batches = max([self._stable_run_config[bench]["run-forks"]["count"] for bench in benchmarks])
+        for batch_index in range(number_of_batches):
+            if dry_run:
+                mx.log(f" * Bench batch #{batch_index + 1}")
+            benchmarks_for_batch = self._get_benchmarks_for_batch(benchmarks, batch_index)
+            for benchmark in benchmarks_for_batch:
+                if dry_run:
+                    mx.log(f"   [#{dispatch_counter + 1}] Fork #{fork_number_dict[benchmark]}: {benchmark}")
+                else:
+                    mx.log(f"Execution of dispatch {dispatch_counter + 1}/{total_dispatch_count} running {benchmark}")
+                mx_benchmark_args = self.state.mx_benchmark_args
+                bm_suite_args = self.state.bm_suite_args
+                last_dispatch = dispatch_counter + 1 == total_dispatch_count
+                with ConstantContextValueManager("last_dispatch", last_dispatch):
+                    fork_info = ForkInfo(
+                        fork_number_dict[benchmark], self._stable_run_config[benchmark]["run-forks"]["count"]
+                    )
+                    yield BenchmarkExecutionConfiguration([benchmark], mx_benchmark_args, bm_suite_args, fork_info)
+                dispatch_counter += 1
+                fork_number_dict[benchmark] += 1
+
+    def _get_benchmarks_for_batch(self, benchmarks: List[str], batch_index: int):
+        return [bench for bench in benchmarks if self._stable_run_config[bench]["run-forks"]["count"] > batch_index]
+
+    def _verify_no_conflicting_args_are_set(self):
+        mx_benchmark_args_dict = vars(self.state.mx_benchmark_args)
+        if mx_benchmark_args_dict.get("fork_count_file") is not None:
+            msg = f"Setting the 'mx benchmark' option 'fork_count_file' is not supported when using {self.__class__.__name__} as a dispatcher!"
+            raise ValueError(msg)
+        if mx_benchmark_args_dict.get("default_fork_count", 1) != 1:
+            msg = f"Setting the 'mx benchmark' option 'default_fork_count' is not supported when using {self.__class__.__name__} as a dispatcher!"
+            raise ValueError(msg)
+
+    def _parse_benchmark_list(self) -> List[str]:
+        if any([sublist is None for sublist in self.state.bench_names_list]):
+            raise ValueError(f"The {self.__class__.__name__} dispatcher cannot dispatch without specified benchmarks!")
+        benchmarks = [bench for sublist in self.state.bench_names_list for bench in sublist]
+        seen = set()
+        unique_list = [bench for bench in benchmarks if not (bench in seen or seen.add(bench))]
+        return [bench for bench in unique_list if not self.skip_platform_unsupported_benchmark(bench)]
+
+    def _verify_stable_run_config(self, benchmarks: List[str]):
+        levels = self._get_required_config_levels()
+        fields = ["count"]
+        for bench in benchmarks:
+            if bench not in self._stable_run_config:
+                msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing an entry for the '{bench}' benchmark!"
+                raise ValueError(msg)
+            bench_config = self._stable_run_config[bench]
+            for level in levels:
+                if level not in bench_config:
+                    msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing the '{level}' key in the '{bench}' object!"
+                    raise ValueError(msg)
+                level_config = bench_config[level]
+                for field in fields:
+                    if field not in level_config:
+                        msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing the '{field}' key in the '{bench}.{level}' object!"
+                        raise ValueError(msg)
+
+    def _get_required_config_levels(self) -> List[str]:
+        return ["run-forks"]
+
+    def _init_fork_number_dict(self, benchmarks) -> Dict[str, int]:
+        return {benchmark: 0 for benchmark in benchmarks}
+
+
+class StabilizingPolybenchNativeImageBenchmarkDispatcher(StabilizingPolybenchBenchmarkDispatcher):
+    """
+    Custom dispatching class for native-image PolybenchBenchmarkSuite stable runs that facilitates scheduling based
+    on a `--stable-run-config` configuration file:
+    * Schedules the appropriate number of forks for each specified benchmark according to their configuration.
+    * Reduces the number of language-launcher image builds, by reusing the same launcher across multiple benchmarks.
+    * Only runs agent and instrumentation stages once, if the VM configuration requires these stages.
+
+    The `--stable-run-config` configuration file should be a JSON object, where each key is a benchmark name.
+    Each entry must include both "builds" and a "run-forks" dictionary, each with a "count" property that
+    specifies the number of builds or forks (respectively) to schedule for that benchmark.
+    Additional properties may be present, but they are not relevant to this dispatcher class.
+
+    Example:
+
+    If the `--stable-run-config` configuration file specifies the following configuration (fields irrelevant
+    for scheduling have been omitted):
+    ```
+    {
+      "interpreter/sieve.py": {
+        "builds": { "count": 1 },
+        "run-forks": { "count": 1 }
+      },
+      "interpreter/fibonacci.py": {
+        "builds": { "count": 2 },
+        "run-forks": { "count": 3 }
+      },
+      "interpreter/richards.py": {
+        "builds": { "count": 3 },
+        "run-forks": { "count": 2 }
+      }
+    }
+    ```
+
+    This dispatcher will produce the following schedule (can be obtained by appending the `--dry-stable-run` option
+    to your `mx benchmark` command):
+    ```
+    * Build #1
+      * Preparation batch (batch #1)
+        [#1] Fork #0: 'agent' stage on interpreter/sieve.py
+        [#2] Fork #0: 'agent' stage on interpreter/richards.py
+        [#3] Fork #0: 'agent' stage on interpreter/fibonacci.py
+      * Preparation batch (batch #2)
+        [#4] Fork [interpreter/sieve.py#0][interpreter/richards.py#0][interpreter/fibonacci.py#0]: 'instrument-image' stage
+      * Preparation batch (batch #3)
+        [#5] Fork #0: 'instrument-run' stage on interpreter/sieve.py
+        [#6] Fork #0: 'instrument-run' stage on interpreter/richards.py
+        [#7] Fork #0: 'instrument-run' stage on interpreter/fibonacci.py
+      * Preparation batch (batch #4)
+        [#8] Fork [interpreter/sieve.py#0][interpreter/richards.py#0][interpreter/fibonacci.py#0]: 'image' stage
+      * Bench batch #1 (batch #5)
+        [#9] Fork #0: 'run' stage on interpreter/sieve.py
+        [#10] Fork #0: 'run' stage on interpreter/richards.py
+        [#11] Fork #0: 'run' stage on interpreter/fibonacci.py
+      * Bench batch #2 (batch #6)
+        [#12] Fork #1: 'run' stage on interpreter/richards.py
+        [#13] Fork #1: 'run' stage on interpreter/fibonacci.py
+      * Bench batch #3 (batch #7)
+        [#14] Fork #2: 'run' stage on interpreter/fibonacci.py
+    * Build #2
+      * Preparation batch (batch #1)
+        [#15] Fork [interpreter/richards.py#2][interpreter/fibonacci.py#3]: 'image' stage
+      * Bench batch #1 (batch #2)
+        [#16] Fork #2: 'run' stage on interpreter/richards.py
+        [#17] Fork #3: 'run' stage on interpreter/fibonacci.py
+      * Bench batch #2 (batch #3)
+        [#18] Fork #3: 'run' stage on interpreter/richards.py
+        [#19] Fork #4: 'run' stage on interpreter/fibonacci.py
+      * Bench batch #3 (batch #4)
+        [#20] Fork #5: 'run' stage on interpreter/fibonacci.py
+    * Build #3
+      * Preparation batch (batch #1)
+        [#21] Fork [interpreter/richards.py#4]: 'image' stage
+      * Bench batch #1 (batch #2)
+        [#22] Fork #4: 'run' stage on interpreter/richards.py
+      * Bench batch #2 (batch #3)
+        [#23] Fork #5: 'run' stage on interpreter/richards.py
+    ```
+
+    * Three builds will be scheduled as the benchmark with the most builds requested (richards.py) requires three.
+      * Only the first build will include AGENT, INSTRUMENT-IMAGE, and INSTRUMENT-RUN stages in the preparation batch.
+      * Every subsequent build will only include an IMAGE stage in the preparation batch.
+    * The first build will include 3 batches, as all of them require at least one build.
+      * The first bench batch will include all 3 benchmarks, as all of them require at least one run-fork.
+      * The second bench batch will include richards.py and fibonacci.py, excluding sieve.py as the configuration for
+        this benchmark requires only one run-fork.
+      * The third bench batch will include only fibonacci.py, excluding both sieve.py and richards.py as their
+        configurations require 1 and 2 run-forks, respectively.
+    * Starting from the second build sieve.py will be excluded, as its configuration requires only one build.
+    * In the third build fibonacci.py will also be excluded, as its configuration requires two builds.
+      * This build will only contain two bench batches, as richards.py is the only remaining benchmark and its
+        configuration requires two run-forks.
+    * For example, the log line "[#15] Fork [interpreter/richards.py#2][interpreter/fibonacci.py#3]: 'image' stage"
+      indicates that:
+      * This is the 15th dispatch from the dispatcher - the 15th invocation of the `BenchmarkSuite.run` method.
+      * The data collected from this dispatch will be duplicated for the benchmarks 'interpreter/richards.py' and
+        'interpreter/fibonacci.py'. Each of these will be labeled as belonging to different forks. The 'richards.py'
+        datapoints will be labeled as belonging to fork number 2 (`metric.fork-number = 2`), while the 'fibonacci.py'
+        datapoints will be labeled as belonging to fork number 3 (`metric.fork-number = 3`).
+        * This data duplication and relabeling is done with the intention of making the data easier to inspect in the
+          average use-case - inspecting a certain benchmark. Thanks to benchmark specific fork numbers, the data will
+          appear to be present for a continuous selection of forks, regardless of the observed benchmark.
+        * The same data can be duplicated and shared across benchmarks due to the fact that a language launcher image
+          is built - an image that is benchmark agnostic. For this reason image stages can be shared, while run stages
+          belong to a single benchmark.
+      * This dispatch will only execute the 'image' stage.
+    * The instrumentation profiles collected in the instrument-run stages are all passed to the image stage with the
+      `-Dnative-image.benchmark.pgo=` option.
+    """
+
+    LANGUAGE_LAUNCHER: str = "<<language_launcher>>"
+
+    def __init__(self, state: BenchmarkDispatcherState, stable_run_config: str):
+        super().__init__(state, stable_run_config)
+        self._dispatch_counter: int = 0
+
+    def dispatch_and_log(
+        self, benchmarks: List[str], total_dispatch_count: int, fork_number_dict: Dict[str, int], dry_run: bool
+    ) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """
+        Yields according to the schedule:
+        * First, it iterates over the builds, determined by the highest requested build count. In each iteration, an
+          image will be built at the start, and then a number of benchmarking batches will be executed using the image.
+        * Second, it iterates over the preparation and benchmark batches. The number of benchmark batches is determined
+          by the highest requested run-fork count from the benchmarks running on the current build.
+          This loop is implemented in the `dispatch_build` method.
+        * Third, it iterates over each benchmark which requires to be run in the current benchmark batch.
+          This loop is implemented in the `dispatch_batch` method.
+        """
+        build_count = max([self._stable_run_config[bench]["builds"]["count"] for bench in benchmarks])
+        self._dispatch_counter = 0
+        with ConstantContextValueManager(PolybenchBenchmarkSuite.PGO_PROFILES, []):
+            for build_index in range(build_count):
+                yield from self.dispatch_build(benchmarks, total_dispatch_count, fork_number_dict, dry_run, build_index)
+
+    def dispatch_build(
+        self,
+        benchmarks: List[str],
+        total_dispatch_count: int,
+        fork_number_dict: Dict[str, int],
+        dry_run: bool,
+        build_index: int,
+    ) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """See the `dispatch_and_log` doc comment."""
+        if dry_run:
+            mx.log(f" * Build #{build_index + 1}")
+        build_stages = ["agent", "instrument-image", "instrument-run", "image"] if build_index == 0 else ["image"]
+        current_build_benchmarks = [
+            bench for bench in benchmarks if self._stable_run_config[bench]["builds"]["count"] > build_index
+        ]
+        number_of_preparation_batches = len(build_stages)
+        bench_batches = [self._stable_run_config[bench]["run-forks"]["count"] for bench in current_build_benchmarks]
+        number_of_batches = number_of_preparation_batches + max(bench_batches)
+        with ConstantContextValueManager(PolybenchBenchmarkSuite.BUILD_BENCHMARKS, current_build_benchmarks):
+            for batch_index in range(number_of_batches):
+                yield from self.dispatch_batch(
+                    current_build_benchmarks,
+                    total_dispatch_count,
+                    fork_number_dict,
+                    dry_run,
+                    batch_index,
+                    build_stages,
+                    number_of_preparation_batches,
+                )
+
+    def dispatch_batch(
+        self,
+        benchmarks: List[str],
+        total_dispatch_count: int,
+        fork_number_dict: Dict[str, int],
+        dry_run: bool,
+        batch_index: int,
+        build_stages: List[str],
+        number_of_preparation_batches: int,
+    ) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
+        """See the `dispatch_and_log` doc comment."""
+        stage = Stage.from_string(build_stages[batch_index] if batch_index < number_of_preparation_batches else "run")
+        new_vm_args = [f"-Dnative-image.benchmark.stages={stage}"]
+        if stage.is_final() and len(bm_exec_context().get_opt(PolybenchBenchmarkSuite.PGO_PROFILES, [])) > 0:
+            pgo_profiles = ",".join(map(str, bm_exec_context().get(PolybenchBenchmarkSuite.PGO_PROFILES)))
+            new_vm_args.append(f"-Dnative-image.benchmark.pgo={pgo_profiles}")
+        extended_bm_suite_args = self._extend_vm_args(self.state.suite, self.state.bm_suite_args, new_vm_args)
+        run_batch_index = batch_index - number_of_preparation_batches
+        benchmarks_for_batch = self._get_benchmarks_for_native_batch(benchmarks, stage, run_batch_index)
+        if dry_run:
+            if run_batch_index < 0:
+                mx.log(f"   * Preparation batch (batch #{batch_index + 1})")
+            elif run_batch_index >= 0:
+                mx.log(f"   * Bench batch #{run_batch_index + 1} (batch #{batch_index + 1})")
+        with ConstantContextValueManager(PolybenchBenchmarkSuite.FORK_FOR_IMAGE, run_batch_index):
+            for benchmark in benchmarks_for_batch:
+                if dry_run:
+                    if stage.is_image():
+                        fork_numbers = [f"[{bench}#{fork_number_dict[bench]}]" for bench in benchmarks]
+                        mx.log(f"     [#{self._dispatch_counter + 1}] Fork {''.join(fork_numbers)}: '{stage}' stage")
+                    else:
+                        msg = f"     [#{self._dispatch_counter + 1}] Fork #{fork_number_dict[benchmark]}: '{stage}' stage on {benchmark}"
+                        mx.log(msg)
+                else:
+                    msg = f"Execution of dispatch {self._dispatch_counter + 1}/{total_dispatch_count} running {stage} stage on {benchmark}"
+                    mx.log(msg)
+                mx_bench_args = self.state.mx_benchmark_args
+                last_dispatch = self._dispatch_counter + 1 == total_dispatch_count
+                with ConstantContextValueManager("last_dispatch", last_dispatch):
+                    total_fork_count = (
+                        self._stable_run_config[benchmark]["builds"]["count"]
+                        * self._stable_run_config[benchmark]["run-forks"]["count"]
+                    )
+                    fork_info = ForkInfo(fork_number_dict[benchmark], total_fork_count)
+                    yield BenchmarkExecutionConfiguration([benchmark], mx_bench_args, extended_bm_suite_args, fork_info)
+                self._dispatch_counter += 1
+                if run_batch_index >= 0:
+                    fork_number_dict[benchmark] += 1
+                if stage.is_image() and stage.is_final():
+                    fork_number_dict[self.LANGUAGE_LAUNCHER] += 1
+
+    def _get_benchmarks_for_native_batch(self, benchmarks: List[str], stage: Stage, run_batch_index: int) -> List[str]:
+        if stage.is_image():
+            return [benchmarks[0]]
+        return self._get_benchmarks_for_batch(benchmarks, run_batch_index)
+
+    def _verify_no_conflicting_args_are_set(self):
+        super()._verify_no_conflicting_args_are_set()
+        vm_args = self.state.suite.vmArgs(self.state.bm_suite_args)
+        if len(parse_prefixed_args("-Dnative-image.benchmark.stages=", vm_args)) > 0:
+            msg = f"Setting the VM option '-Dnative-image.benchmark.stages' is not supported when using {self.__class__.__name__} as a dispatcher!"
+            raise ValueError(msg)
+
+    def _get_required_config_levels(self) -> List[str]:
+        return ["builds"] + super()._get_required_config_levels()
+
+    def _extend_vm_args(
+        self, suite: "PolybenchBenchmarkSuite", bm_suite_args: List[str], new_vm_args: List[str]
+    ) -> List[str]:
+        vm_args, run_args = suite.vmAndRunArgs(bm_suite_args)
+        return vm_args + new_vm_args + ["--"] + run_args
+
+    def _init_fork_number_dict(self, benchmarks) -> Dict[str, int]:
+        return super()._init_fork_number_dict(benchmarks + [self.LANGUAGE_LAUNCHER])
+
+
+class ImageStageDatapointDuplicatingPostProcessor(DataPointsPostProcessor):
+    """
+    Ensures the datapoints from the image stage are duplicated so that there is a datapoint for:
+     * each benchmark: To facilitate easier access to image stage metrics to users inspecting a specific benchmark.
+     * the executable name (e.g. "python"): To indicate that the image does not have anything to do with the
+                                            currently running benchmarks - as the image produced is a language
+                                            launcher that will take the benchmark file as input.
+    Doing both of these might seem counterintuitive, but it is done with two different users in mind.
+
+    Does not have any effect if not in native mode and thus no image stage datapoints are present.
+    """
+
+    def __init__(self, suite: "PolybenchBenchmarkSuite"):
+        super().__init__()
+        self._suite = suite
+
+    def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        copies = []
+        executable_name = bm_exec_context().get(PolybenchBenchmarkSuite.CURRENT_IMAGE).executable_name()
+        all_benchmarks = bm_exec_context().get("benchmarks")
+        benchmarks = bm_exec_context().get_opt(PolybenchBenchmarkSuite.BUILD_BENCHMARKS, all_benchmarks)
+        override_map = bm_exec_context().get_opt(PolybenchBenchmarkSuite.FORK_OVERRIDE_MAP)
+        for dp in datapoints:
+            stage = dp.get("native-image.stage")
+            if stage is not None and "image" in stage:
+                self.set_benchmark_specific_dimensions(
+                    dp,
+                    executable_name,
+                    override_map,
+                    StabilizingPolybenchNativeImageBenchmarkDispatcher.LANGUAGE_LAUNCHER,
+                )
+                dp["extra.duplicated"] = "false"
+                for benchmark in benchmarks:
+                    copy = dp.copy()
+                    self.set_benchmark_specific_dimensions(copy, benchmark, override_map)
+                    copy["extra.duplicated"] = "true"
+                    copies.append(copy)
+        return datapoints + copies
+
+    def set_benchmark_specific_dimensions(
+        self, dp: DataPoint, benchmark: str, override_map: Dict[str, int], override_key: Optional[str] = None
+    ):
+        dp["benchmark"] = benchmark
+        if override_map is None or dp.get("metric.fork-number") is None:
+            return
+        if override_key is None:
+            override_key = benchmark
+        if override_key not in override_map:
+            raise ValueError(f"No fork override provided for '{override_key}'!")
+        dp["metric.fork-number"] = override_map[override_key]
+
+
+class FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor(
+    mx_benchmark.DataPointsAverageProducerWithOutlierRemoval
+):
+    """
+    Customizable post-processor that is intended to execute only once: after the run stage (last stage) of
+    the last dispatch, but considers all the datapoints (from previous stages and dispatches)
+    when removing outliers and computing the average.
+
+    DEVELOPER NOTES:
+    * Should be scheduled to execute only once!
+    * Groups formed by the `key_fn` should contain only datapoints with the same "benchmark" dimension!
+    """
+
+    def __init__(
+        self,
+        suite: "PolybenchBenchmarkSuite",
+        selector_fn: Optional[Callable[[DataPoint], bool]],
+        key_fn: Optional[Callable[[DataPoint], Any]],
+        field: str,
+        update_fn: Optional[Callable[[DataPoint], DataPoint]],
+        aggregation_level: str,
+        final_consumer: bool,
+    ):
+        # The lower and upper percentiles will be set on a per-group basis in `calculate_aggregate_value` - as they
+        # can have different values for different benchmarks.
+        super().__init__(selector_fn, key_fn, field, update_fn, 0, 1)
+        self._suite = suite
+        self._aggregation_level = aggregation_level
+        self._final_consumer = final_consumer
+
+    def select_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        # Select datapoints from all forks and stages. The latest datapoints (the ones in the `datapoints` argument)
+        # have not yet been added to PolybenchBenchmarkSuite.DATAPOINTS by the `ContextStorePostProcessor`, so
+        # we add them here.
+        return super().select_datapoints(bm_exec_context().get(PolybenchBenchmarkSuite.DATAPOINTS) + datapoints)
+
+    def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        if self._final_consumer:
+            if bm_exec_context().get(PolybenchBenchmarkSuite.CONSUMED):
+                msg = "Failed to guarantee a single execution! The aggregate datapoints were already produced!"
+                raise ValueError(msg)
+            bm_exec_context().update(PolybenchBenchmarkSuite.CONSUMED, True)
+        return super().process_datapoints(datapoints)
+
+    def calculate_aggregate_value(self, datapoints: DataPoints) -> Any:
+        config = bm_exec_context().get(PolybenchBenchmarkSuite.STABLE_CONFIG)
+        benchmark = self.get_and_verify_unique_benchmark_dimension(datapoints)
+        self._lower_percentile = self._suite.resolve_config_field_or_default(
+            config, [benchmark, self._aggregation_level, "lower-percentile"], 0
+        )
+        self._upper_percentile = self._suite.resolve_config_field_or_default(
+            config, [benchmark, self._aggregation_level, "upper-percentile"], 1
+        )
+        return super().calculate_aggregate_value(datapoints)
+
+    def get_and_verify_unique_benchmark_dimension(self, datapoints: DataPoints) -> str:
+        benchmark = datapoints[0]["benchmark"]
+        for dp in datapoints:
+            if dp["benchmark"] != benchmark:
+                raise ValueError("The datapoints group is expected to share the 'benchmark' dimension but does not!")
+        return benchmark
+
+    def verify_and_process_id_score_function(self, datapoint: DataPoint):
+        score_function = datapoint.get("metric.score-function", "id")
+        if score_function != "id":
+            raise ValueError(
+                f"{self.__class__.__name__} can only post-process datapoints with a 'metric.score-function' of value 'id'! Encountered score function: '{score_function}'."
+            )
+        datapoint["metric.score-value"] = datapoint["metric.value"]
+
+
+class NonNativeImageBenchmarkSummaryPostProcessor(FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor):
+    """
+    Post-processor that calculates the outlier-excluded average of the "avg-time" metric across dispatches
+    and produces a final "time" metric for a benchmark.
+    Should only be used when running a benchmark in server (non-native) mode.
+    """
+
+    def __init__(self, suite: "PolybenchBenchmarkSuite"):
+        selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "fork"
+        key_fn = lambda dp: dp["benchmark"]
+        field = "metric.value"
+
+        def update_fn(dp):
+            dp["metric.name"] = "time"
+            if "metric.object" in dp:
+                del dp["metric.object"]
+            if "metric.fork-number" in dp:
+                del dp["metric.fork-number"]
+            self.verify_and_process_id_score_function(dp)
+            return dp
+
+        super().__init__(suite, selector_fn, key_fn, field, update_fn, "run-forks", True)
+
+
+class NativeModeBuildSummaryPostProcessor(FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor):
+    """
+    Post-processor that calculates the outlier-excluded average of the "avg-time" metric across run-only-forks
+    and produces the "avg-time" metric for an image build.
+    Should only be used when running a benchmark in native mode.
+    """
+
+    def __init__(self, suite: "PolybenchBenchmarkSuite"):
+        selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "fork"
+        key_fn = lambda dp: (dp["benchmark"], dp["native-image.stage"], dp["native-image.rebuild-number"])
+        field = "metric.value"
+
+        def update_fn(dp):
+            dp["metric.object"] = "build"
+            if "metric.fork-number" in dp:
+                del dp["metric.fork-number"]
+            if "native-image.image-fork-number" in dp:
+                del dp["native-image.image-fork-number"]
+            self.verify_and_process_id_score_function(dp)
+            return dp
+
+        super().__init__(suite, selector_fn, key_fn, field, update_fn, "run-forks", False)
+
+
+class NativeModeBenchmarkSummaryPostProcessor(FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor):
+    """
+    Post-processor that calculates the outlier-excluded average of the "avg-time" metric across image builds
+    and produces a final "time" metric for a benchmark (separate "run" and "instrument-run" datapoints).
+    Should only be used when running a benchmark in native mode.
+    """
+
+    def __init__(self, suite: "PolybenchBenchmarkSuite"):
+        selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "build"
+        key_fn = lambda dp: (dp["benchmark"], dp["native-image.stage"])
+        field = "metric.value"
+
+        def update_fn(dp):
+            dp["metric.name"] = "time"
+            if "metric.object" in dp:
+                del dp["metric.object"]
+            if "native-image.rebuild-number" in dp:
+                del dp["native-image.rebuild-number"]
+            self.verify_and_process_id_score_function(dp)
+            return dp
+
+        super().__init__(suite, selector_fn, key_fn, field, update_fn, "builds", True)
+
+
+class GraalSpecificFieldsRemoverPostProcessor(DataPointsPostProcessor):
+    """
+    Removes all platform Graal specific fields from all the datapoints.
+    Used for cleaning up the bench results of a benchmark that runs on
+    a different platform (e.g. CPython).
+    The removed fields include:
+        * The "guest-vm" and "guest-vm-config" fields.
+        * All the "platform.*" fields.
+    """
+
+    def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        return [{k: v for k, v in dp.items() if self._should_be_kept(k)} for dp in datapoints]
+
+    def _should_be_kept(self, key) -> bool:
+        return key not in ["guest-vm", "guest-vm-config"] and not key.startswith("platform.")
+
+
+class ContextStorePostProcessor(DataPointsPostProcessor):
+    """
+    Post-processor that stores datapoints in the execution context for other post-processors that require access to
+    datapoints from all dispatches. Performs no datapoints modifications.
+    """
+
+    def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        existing_datapoints = bm_exec_context().get(PolybenchBenchmarkSuite.DATAPOINTS)
+        bm_exec_context().update(PolybenchBenchmarkSuite.DATAPOINTS, existing_datapoints + datapoints)
+        return datapoints
+
+
+class ContextResetPostProcessor(DataPointsPostProcessor):
+    """
+    Resets fork-batch specific execution context fields for the next fork batch. Performs no datapoints modifications.
+    """
+
+    def __init__(self, suite: "PolybenchBenchmarkSuite"):
+        super().__init__()
+        self._suite = suite
+
+    def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
+        if (
+            not bm_exec_context().get(PolybenchBenchmarkSuite.CONSUMED)
+            and not self._suite.polybench_bench_suite_args(bm_exec_context().get("bm_suite_args")).dry_stable_run
+        ):
+            msg = f"Failed to produce the aggregate benchmark datapoints! This should have happened in the final fork!"
+            raise ValueError(msg)
+        bm_exec_context().update(PolybenchBenchmarkSuite.CONSUMED, False)
+        bm_exec_context().update(PolybenchBenchmarkSuite.DATAPOINTS, [])
+        bm_exec_context().update(PolybenchBenchmarkSuite.REBUILD_NUMBER, -1)
+        return datapoints
+
+
+class CurrentImageManager(ConstantContextValueManager):
+    """Represents the currently used PolyBench image cache entry."""
+
+    def __init__(
+        self, suite: "PolybenchBenchmarkSuite", resolved_benchmark: ResolvedPolybenchBenchmark, bm_suite_args: List[str]
+    ):
+        languages = resolved_benchmark.suite.languages
+        impactful_vm_args = suite.vm_args_impacting_image_build(bm_suite_args)
+        entry = PolybenchImageCacheEntry.create(languages, impactful_vm_args)
+        super().__init__(PolybenchBenchmarkSuite.CURRENT_IMAGE, entry)
+
+    def __enter__(self):
+        try:
+            super().__enter__()
+        except ValueError:
+            existing_entry = bm_exec_context().get(self._name).executable_name()
+            msg = f"Tried to set current image to {self._value.executable_name()}, but there is already a current image ({existing_entry})."
+            raise ValueError(msg)
+
+
 class PolybenchImageCacheEntry(NamedTuple):
     """
     Represents the parameters of a cached image build. When possible, PolybenchBenchmarkSuite will
@@ -282,53 +967,6 @@ def _hash_build_args(self) -> str:
         return hashlib.sha256(build_args_string.encode("utf-8")).hexdigest()[:8]
 
 
-# Extract this into benchmark-side configuration files (GR-70587)
-_polybench_bench_suite_config = {
-    "interpreter/fibonacci.py": {
-        "outlier-fork-removal": {
-            "builds": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-            "run-forks": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-        },
-    },
-    "interpreter/deltablue.py": {
-        "outlier-fork-removal": {
-            "builds": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-            "run-forks": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-        },
-    },
-    "interpreter/richards.py": {
-        "outlier-fork-removal": {
-            "builds": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-            "run-forks": {
-                "count": 5,
-                "lower-percentile": 0,
-                "upper-percentile": 0.4,
-            },
-        },
-    },
-}
-
-
 class PolybenchBenchmarkSuite(
     mx_benchmark.JavaBenchmarkSuite, mx_benchmark.TemporaryWorkdirMixin, mx_sdk_benchmark.NativeImageBenchmarkMixin
 ):
@@ -347,14 +985,21 @@ class PolybenchBenchmarkSuite(
     }
     REUSE_DISK_IMAGES = "POLYBENCH_REUSE_DISK_IMAGES"
     POLYBENCH_BENCH_SUITE_PARSER_NAME = "polybench_bench_suite_parser_name"
+    # Use "PolybenchBenchmarkSuite.*" execution context keys to avoid potential key collisions
+    CURRENT_IMAGE = "PolybenchBenchmarkSuite.current_image"
+    IMAGE_CACHE = "PolybenchBenchmarkSuite.image_cache"
+    FORK_OVERRIDE_MAP = "PolybenchBenchmarkSuite.fork_number_override_map"
+    REBUILD_NUMBER = "PolybenchBenchmarkSuite.rebuild_number"
+    DATAPOINTS = "PolybenchBenchmarkSuite.datapoints"
+    CONSUMED = "PolybenchBenchmarkSuite.consumed_datapoints"
+    STABLE_CONFIG = "PolybenchBenchmarkSuite.stable_run_config"
+    FORK_FOR_IMAGE = "PolybenchBenchmarkSuite.image_fork_number"
+    BUILD_BENCHMARKS = "PolybenchBenchmarkSuite.current_build_benchmarks"
+    PGO_PROFILES = "PolybenchBenchmarkSuite.collected_pgo_profiles"
 
-    def __init__(self):
-        super(PolybenchBenchmarkSuite, self).__init__()
-        self._image_cache: Set[PolybenchImageCacheEntry] = set()
-        self._current_image: Optional[PolybenchImageCacheEntry] = None
-        # Consider extracting these into an execution context object - the suite should be stateless (GR-70605)
-        self._forks_until_rebuild_counter: int = -1
-        self._image_rebuild_index: int = -1
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        bm_exec_context().add_context_value(PolybenchBenchmarkSuite.IMAGE_CACHE, BoxContextValue(set()))
 
     def group(self):
         return "Graal"
@@ -366,7 +1011,7 @@ def name(self):
         return "polybench"
 
     def version(self):
-        return "0.2.0"
+        return "0.3.0"
 
     def _resolve_benchmarks(self) -> Dict[str, ResolvedPolybenchBenchmark]:
         if not hasattr(self, "_benchmarks"):
@@ -385,13 +1030,9 @@ def filter_stages_with_cli_requested_stages(self, bm_suite_args: List[str], stag
         if len(parse_prefixed_args("-Dnative-image.benchmark.stages=", self.vmArgs(bm_suite_args))) > 0:
             return super().filter_stages_with_cli_requested_stages(bm_suite_args, stages)
         # Filter stages for optimized fork runs: we might want just a single instrument-image stage and multiple run stages per one image stage
-        preserve_only_run_stages = (
-            self._decrement_and_check_rebuild_counter()
-            and self.execution_context.native_mode
-            and self._image_is_cached(bm_suite_args)
-        )
+        preserve_only_run_stages = self._image_is_cached(bm_suite_args)
         remove_instrumentation_stages = (
-            self._image_rebuild_index > 0
+            bm_exec_context().get(PolybenchBenchmarkSuite.REBUILD_NUMBER) > 0
             and not self.polybench_bench_suite_args(bm_suite_args).regenerate_instrumentation_profile
         )
         if preserve_only_run_stages:
@@ -412,101 +1053,69 @@ def checkSamplesInPgo(self):
         # Sampling does not support images that use runtime compilation.
         return False
 
-    def resolve_config_field_or_default(self, keys: List[str], default: Any) -> Any:
+    @staticmethod
+    def resolve_config_field_or_default(config: dict, keys: List[str], default: Any) -> Any:
         """Resolves a nested Polybench config dictionary value, or returns the default value if a key cannot be resolved."""
-        curr = _polybench_bench_suite_config
+        if config is None:
+            return default
+        curr = config
         for key in keys:
             if not isinstance(curr, dict) or key not in curr:
                 return default
             curr = curr[key]
         return curr
 
-    def default_fork_count(self, benchmarks: List[str], bm_suite_args: List[str]) -> int:
-        """
-        Defaults to 1 via super `default_fork_count` implementation.
-        If the `--stable-run` Polybench bench suite flag is enabled and the appropriate configuration is available for
-        the current benchmark, then returns a fork count accommodating multiple image builds and multiple run-only-forks
-        (in order to provide more stable metrics).
-        """
-        if (
-            not self.polybench_bench_suite_args(bm_suite_args).stable_run
-            or len(benchmarks) != 1
-            or self.resolve_config_field_or_default([benchmarks[0], "outlier-fork-removal"], None) is None
-        ):
-            return super().default_fork_count(benchmarks, bm_suite_args)
-        build_count = self.resolve_config_field_or_default(
-            [benchmarks[0], "outlier-fork-removal", "builds", "count"], 1
-        )
-        run_only_fork_count = self.resolve_config_field_or_default(
-            [benchmarks[0], "outlier-fork-removal", "run-forks", "count"], 1
-        )
-        # No execution context has been established at this point
-        if self.is_native_mode(bm_suite_args):
-            return build_count * run_only_fork_count
-        else:
-            return run_only_fork_count
-
-    def _reset_rebuild_counter(self):
-        """
-        Resets the language launcher image rebuild counter. The reset value depends on the `--stable-run` CLI option
-        and the `_polybench_bench_suite_config` configuration dict.
-        """
-        if self.polybench_bench_suite_args(self.execution_context.bmSuiteArgs).stable_run:
-            self._forks_until_rebuild_counter = self.resolve_config_field_or_default(
-                [self.execution_context.benchmark, "outlier-fork-removal", "run-forks", "count"], float("inf")
-            )
-        else:
-            # Never rebuild the language launcher image, re-use the image built in the first fork in subsequent forks
-            self._forks_until_rebuild_counter = float("inf")
-
-    def _decrement_and_check_rebuild_counter(self) -> bool:
-        """
-        Resets the rebuild counter if it has reached zero or wasn't yet initialized, and then decrements it.
-        Returns `False` if  the counter was reset.
-        Used to determine whether an image stage is due in the current fork or if it is supposed to be a run-only-fork.
-        """
-        can_reuse = True
-        if self._forks_until_rebuild_counter <= 0:
-            self._reset_rebuild_counter()
-            self._image_rebuild_index += 1
-            can_reuse = False
-        self._forks_until_rebuild_counter -= 1
-        return can_reuse
-
-    class PolybenchExecutionContext(mx_benchmark.SingleBenchmarkExecutionContext):
-        """Runtime context of the `PolybenchBenchmarkSuite` bench suite."""
-
-        def __init__(
-            self,
-            suite: BenchmarkSuite,
-            vm: Optional[Vm],
-            benchmarks: List[str],
-            bmSuiteArgs: List[str],
-            fork_info: Optional[ForkInfo] = None,
-        ):
-            super().__init__(suite, vm, benchmarks, bmSuiteArgs, fork_info)
-            # It is important for the summary post-processors to be persisted across stages and forks
-            if isinstance(suite.execution_context, PolybenchBenchmarkSuite.PolybenchExecutionContext):
-                self._native_mode: bool = suite.execution_context.native_mode
-                self._post_processors: List[DataPointsPostProcessor] = suite.execution_context.post_processors
+    def get_dispatcher(self, state: BenchmarkDispatcherState) -> BenchmarkDispatcher:
+        """Returns one of the custom dispatchers if the '--stable-run-config' option is set, defaults to super otherwise."""
+        stable_run_config = self.polybench_bench_suite_args(state.bm_suite_args).stable_run_config
+        if stable_run_config is not None:
+            if self.is_native_mode(state.bm_suite_args):
+                dispatcher_class = StabilizingPolybenchNativeImageBenchmarkDispatcher
             else:
-                self._native_mode: bool = suite.is_native_mode(bmSuiteArgs)
-                self._post_processors: List[DataPointsPostProcessor] = suite._get_post_processors(
-                    self.benchmark, self._native_mode, self.bmSuiteArgs
-                )
-
-        @property
-        def native_mode(self):
-            return self._native_mode
-
-        @property
-        def post_processors(self):
-            return self._post_processors
-
-    def new_execution_context(
-        self, vm: Optional[Vm], benchmarks: List[str], bmSuiteArgs: List[str], fork_info: Optional[ForkInfo] = None
-    ) -> BenchmarkExecutionContext:
-        return PolybenchBenchmarkSuite.PolybenchExecutionContext(self, vm, benchmarks, bmSuiteArgs, fork_info)
+                dispatcher_class = StabilizingPolybenchBenchmarkDispatcher
+            msg = f"Using a {dispatcher_class.__name__} instance for benchmark dispatching due to the '--stable-run-config' option being set."
+            mx.log(msg)
+            return dispatcher_class(state, stable_run_config)
+        return super().get_dispatcher(state)
+
+    def before(self, bmSuiteArgs):
+        super().before(bmSuiteArgs)
+        bm_exec_context().add_context_value(PolybenchBenchmarkSuite.DATAPOINTS, BoxContextValue([]))
+        bm_exec_context().add_context_value(PolybenchBenchmarkSuite.CONSUMED, BoxContextValue(False))
+        bm_exec_context().add_context_value(PolybenchBenchmarkSuite.REBUILD_NUMBER, BoxContextValue(-1))
+
+    def after(self, bmSuiteArgs):
+        bm_exec_context().remove(PolybenchBenchmarkSuite.DATAPOINTS)
+        bm_exec_context().remove(PolybenchBenchmarkSuite.CONSUMED)
+        bm_exec_context().remove(PolybenchBenchmarkSuite.REBUILD_NUMBER)
+        super().after(bmSuiteArgs)
+
+    def run_stage(self, vm, stage: Stage, command, out, err, cwd, nonZeroIsFatal):
+        # Increment rebuild number before running the 'image' stage
+        if stage.is_image() and stage.is_final():
+            bm_exec_context().update(
+                PolybenchBenchmarkSuite.REBUILD_NUMBER,
+                bm_exec_context().get(PolybenchBenchmarkSuite.REBUILD_NUMBER) + 1,
+            )
+        exit_code = super().run_stage(vm, stage, command, out, err, cwd, nonZeroIsFatal)
+        # Copy the profile after running the 'instrument-run' stage
+        self._ensure_instrumentation_profile_name_is_benchmark_specific(vm, stage)
+        return exit_code
+
+    def _ensure_instrumentation_profile_name_is_benchmark_specific(
+        self, vm: mx_sdk_benchmark.NativeImageVM, stage: Stage
+    ):
+        not_instrument_stage = stage.stage_name != StageName.INSTRUMENT_RUN
+        no_collection = not bm_exec_context().has(PolybenchBenchmarkSuite.PGO_PROFILES)
+        if not_instrument_stage or no_collection:
+            return
+        # Copy the profile to ensure it isn't overwritten by next benchmark
+        new_pgo_profile = vm.config.profile_path
+        benchmark_sanitized = bm_exec_context().get("benchmark").replace("/", "-").replace(".", "-")
+        bench_unique_profile_path = new_pgo_profile.parent / f"{benchmark_sanitized}.iprof"
+        shutil.copy(new_pgo_profile, bench_unique_profile_path)
+        # Store the profile for use in upcoming IMAGE stages
+        bm_exec_context().get(PolybenchBenchmarkSuite.PGO_PROFILES).append(bench_unique_profile_path)
 
     def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         # name used by NativeImageBenchmarkMixin
@@ -520,14 +1129,21 @@ def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         mx.logv(f"Languages included on the classpath: {resolved_benchmark.suite.languages}")
 
         env_vars = PolybenchBenchmarkSuite._prepare_distributions(working_directory, resolved_benchmark)
-        with _extend_env(env_vars), self._set_image_context(resolved_benchmark, bmSuiteArgs):
+        with _extend_env(env_vars), CurrentImageManager(
+            self, resolved_benchmark, bmSuiteArgs
+        ), ConstantContextValueManager("benchmark", resolved_benchmark.name), ConstantContextValueManager(
+            "native_mode", self.is_native_mode(bmSuiteArgs)
+        ), ConstantContextValueManager(
+            PolybenchBenchmarkSuite.STABLE_CONFIG, self._resolve_stable_run_config()
+        ):
             datapoints = self.intercept_run(super(), benchmarks, bmSuiteArgs)
-            if self.execution_context.native_mode:
-                self._image_cache.add(self._current_image)
+            if bm_exec_context().get("native_mode"):
+                image_cache = bm_exec_context().get(PolybenchBenchmarkSuite.IMAGE_CACHE)
+                image_cache.add(bm_exec_context().get(PolybenchBenchmarkSuite.CURRENT_IMAGE))
             return datapoints
 
     def use_stage_aware_benchmark_mixin_intercept_run(self):
-        if self.jvm(self.execution_context.bmSuiteArgs) == "cpython":
+        if self.jvm(bm_exec_context().get("bm_suite_args")) == "cpython":
             return True
         return False
 
@@ -536,6 +1152,13 @@ def _resolve_current_benchmark(self, benchmarks) -> ResolvedPolybenchBenchmark:
             mx.abort(f"Must specify one benchmark at a time (given: {benchmarks})")
         return self._resolve_benchmarks()[benchmarks[0]]
 
+    def _resolve_stable_run_config(self):
+        config_path = self.polybench_bench_suite_args(bm_exec_context().get("bm_suite_args")).stable_run_config
+        if config_path is None:
+            return {}
+        with open(config_path) as f:
+            return json.load(f)
+
     @staticmethod
     def _prepare_distributions(
         working_directory: str, resolved_benchmark: ResolvedPolybenchBenchmark
@@ -564,30 +1187,28 @@ def _prepare_distributions(
 
         return env_vars
 
-    @contextlib.contextmanager
-    def _set_image_context(self, resolved_benchmark: ResolvedPolybenchBenchmark, bm_suite_args: List[str]):
-        """
-        Defines a context for the "current" image. This field determines the executable name, which
-        is used by NI benchmarking infra to resolve the name/location of the built image.
-        """
-        entry = PolybenchImageCacheEntry.create(resolved_benchmark.suite.languages, self.vmArgs(bm_suite_args))
-        assert (
-            not self._current_image
-        ), f"Tried to set current image to {entry.executable_name()}, but there is already a current image ({self._current_image.executable_name()})."
-        self._current_image = entry
-        yield
-        self._current_image = None
+    def vm_args_impacting_image_build(self, bm_suite_args: List[str]) -> List[str]:
+        """Returns the VM args excluding any args that do not impact the image."""
+        vm_args = self.vmArgs(bm_suite_args)
+        impactful_vm_args = []
+        for vm_arg in vm_args:
+            if vm_arg.startswith("-Dnative-image.benchmark.stages="):
+                continue
+            impactful_vm_args.append(vm_arg)
+        return impactful_vm_args
 
     def _base_image_name(self) -> Optional[str]:
         """Overrides the image name used to build/run the image."""
-        if self.jvm(self.execution_context.bmSuiteArgs) == "cpython":
-            benchmark_sanitized = self.execution_context.benchmark.replace("/", "-").replace(".", "-")
+        if self.jvm(bm_exec_context().get("bm_suite_args")) == "cpython":
+            benchmark_sanitized = bm_exec_context().get("benchmark").replace("/", "-").replace(".", "-")
             return f"{benchmark_sanitized}-staged-benchmark"
-        assert self._current_image, "Image should have been set already"
-        return self._current_image.full_executable_name()
+        assert bm_exec_context().has(PolybenchBenchmarkSuite.CURRENT_IMAGE), "Image should have been set already"
+        return bm_exec_context().get(PolybenchBenchmarkSuite.CURRENT_IMAGE).full_executable_name()
 
     def _image_is_cached(self, bm_suite_args: List[str]) -> bool:
-        if self._current_image in self._image_cache:
+        current_image = bm_exec_context().get(PolybenchBenchmarkSuite.CURRENT_IMAGE)
+        image_cache = bm_exec_context().get(PolybenchBenchmarkSuite.IMAGE_CACHE)
+        if current_image in image_cache:
             return True
 
         if mx.get_env(PolybenchBenchmarkSuite.REUSE_DISK_IMAGES) in ["true", "True"]:
@@ -604,10 +1225,6 @@ def _image_is_cached(self, bm_suite_args: List[str]) -> bool:
 
         return False
 
-    def _extend_vm_args(self, bm_suite_args: List[str], new_vm_args: List[str]) -> List[str]:
-        vm_args, run_args = self.vmAndRunArgs(bm_suite_args)
-        return vm_args + new_vm_args + ["--"] + run_args
-
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         resolved_benchmark = self._resolve_current_benchmark(benchmarks)
 
@@ -624,7 +1241,7 @@ def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
     def parserNames(self) -> List[str]:
         return super().parserNames() + [PolybenchBenchmarkSuite.POLYBENCH_BENCH_SUITE_PARSER_NAME]
 
-    def polybench_bench_suite_args(self, bm_suite_args: List[str]):
+    def polybench_bench_suite_args(self, bm_suite_args: List[str]) -> Namespace:
         """Parses the "vm and suite" args for any known Polybench args and returns a namespace with Polybench arg values."""
         vm_and_suite_args = self.vmAndRunArgs(bm_suite_args)[0]
         namespace, _ = get_parser(PolybenchBenchmarkSuite.POLYBENCH_BENCH_SUITE_PARSER_NAME).parse_known_args(
@@ -643,12 +1260,13 @@ def runAndReturnStdOut(self, benchmarks, bmSuiteArgs):
                 "guest-vm-config": guest_vm_config,
             }
         )
-        if self.execution_context.native_mode:
-            dims.update(
-                {
-                    "native-image.rebuild-number": self._image_rebuild_index,
-                }
-            )
+        if bm_exec_context().get("native_mode"):
+            # max(0, _) to handle instrumentation stages and running on previously built images
+            rebuild_num = max(0, bm_exec_context().get(PolybenchBenchmarkSuite.REBUILD_NUMBER))
+            dims["native-image.rebuild-number"] = rebuild_num
+            if bm_exec_context().has(PolybenchBenchmarkSuite.FORK_FOR_IMAGE):
+                fork_for_image = max(0, bm_exec_context().get(PolybenchBenchmarkSuite.FORK_FOR_IMAGE))
+                dims["native-image.image-fork-number"] = fork_for_image
         return ret_code, out, dims
 
     def _infer_host_vm_config(self, bm_suite_args, dims):
@@ -656,7 +1274,7 @@ def _infer_host_vm_config(self, bm_suite_args, dims):
         if edition not in ["ce", "ee"] or not dims.get("platform.prebuilt-vm", False):
             raise ValueError(f"Polybench should only run with a prebuilt GraalVM. Dimensions found: {dims}")
 
-        if self.execution_context.native_mode:
+        if bm_exec_context().get("native_mode"):
             # patch ce/ee suffix
             existing_config = dims["host-vm-config"]
             existing_edition = existing_config.split("-")[-1]
@@ -675,7 +1293,7 @@ def _infer_host_vm_config(self, bm_suite_args, dims):
 
     def _infer_guest_vm_info(self, benchmarks, bm_suite_args) -> Tuple[str, str]:
         resolved_benchmark = self._resolve_current_benchmark(benchmarks)
-        # Eventually this must check for exact match for each language and map it to the corresponding guest-vm
+        # Eventually this must check for exact match for each language and map it to the corresponding guest-vm.
         # Here, we just infer it based on the presence of some language in a list. This must be made more robust
         # and more generic to handle the case when multiple languages are used.
         if "js" in resolved_benchmark.suite.languages:
@@ -701,7 +1319,7 @@ def rules(self, output, benchmarks, bmSuiteArgs):
         if metric_name is None:
             return []
         rules = []
-        benchmark_name = benchmarks[0]
+        benchmark_name = bm_exec_context().get("benchmark")
         if metric_name == "time":
             # For metric "time", two metrics are reported:
             # - "warmup" (per-iteration data for "warmup" and "run" iterations)
@@ -816,195 +1434,38 @@ def rules(self, output, benchmarks, bmSuiteArgs):
         ]
         return rules
 
-    class NativeModeBenchmarkRenamingPostProcessor(DataPointsPostProcessor):
-        """
-        Rewrites the "benchmark" field of all image stage datapoints to the image name (e.g. "python").
-        This is done to indicate that the image does not have anything to do with the currently running benchmark - the
-        image produced is a language launcher that will take the benchmark file as input.
-        Should only be used when running a benchmark in native mode.
-        """
-
-        def __init__(self, suite: "PolybenchBenchmarkSuite"):
-            super().__init__()
-            self._suite = suite
-
-        def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
-            for dp in datapoints:
-                stage = dp.get("native-image.stage")
-                if stage is not None and "image" in stage:
-                    # associate any image build datapoints with the name of the image (rather than the benchmark)
-                    dp["benchmark"] = self._suite._current_image.executable_name()
-            return datapoints
-
-    class AfterAllForksAverageWithOutlierRemovalPostProcessor(mx_benchmark.DataPointsAverageProducerWithOutlierRemoval):
-        """
-        Customizable post-processor that only executes after the run stage (last stage) of the last fork, but includes
-        all datapoints (from previous stages and forks) when removing outliers and computing the average.
-        """
-
-        def __init__(
-            self,
-            suite: "PolybenchBenchmarkSuite",
-            selector_fn: Optional[Callable[[DataPoint], bool]],
-            key_fn: Optional[Callable[[DataPoint], Any]],
-            field: str,
-            update_fn: Optional[Callable[[DataPoint], DataPoint]],
-            lower_percentile: float,
-            upper_percentile: float,
-        ):
-            super().__init__(selector_fn, key_fn, field, update_fn, lower_percentile, upper_percentile)
-            self._suite = suite
-            self._fork_stage_datapoints = {}
-
-        def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
-            fork_info = self._suite.execution_context.fork_info
-            # When running non-native benchmarks there is no concept of stages, there is only a single bench suite run.
-            # So we store a pretend "run" stage to indicate that all datapoints (for this fork) have already been produced.
-            current_stage = Stage.from_string("run")
-            try:
-                current_stage = self._suite.stages_info.current_stage
-            except AttributeError:
-                pass
-            # Preserve this fork-stage's datapoints for eventual post-processing.
-            self._fork_stage_datapoints[(fork_info.current_fork_index, current_stage)] = datapoints
-            if (
-                fork_info.current_fork_index + 1 < fork_info.total_fork_count
-                or current_stage.stage_name != StageName.RUN
-            ):
-                # Delay post-processing until the 'run' stage of the last fork.
-                return datapoints
-            # All datapoints from this benchmark have been produced and are available for post-processing.
-            return super().process_datapoints(datapoints)
-
-        def select_datapoints(self, datapoints: DataPoints) -> DataPoints:
-            # Select datapoints from all forks and stages. The latest datapoints (the ones in the `datapoints` argument)
-            # have already been added to `_fork_stage_datapoints` in `process_datapoints`.
-            all_datapoints = []
-            for fork_stage_datapoints in self._fork_stage_datapoints.values():
-                all_datapoints += fork_stage_datapoints
-            return super().select_datapoints(all_datapoints)
-
-        def verify_and_process_id_score_function(self, datapoint: DataPoint):
-            score_function = datapoint.get("metric.score-function", "id")
-            if score_function != "id":
-                raise ValueError(
-                    f"{self.__class__.__name__} can only post-process datapoints with a 'metric.score-function' of value 'id'! Encountered score function: '{score_function}'."
-                )
-            datapoint["metric.score-value"] = datapoint["metric.value"]
-
-    class NativeModeBuildSummaryPostProcessor(AfterAllForksAverageWithOutlierRemovalPostProcessor):
-        """
-        Post-processor that calculates the outlier excluded average of the "avg-time" metric across run-only-forks
-        and produces the "avg-time" metric for an image build.
-        Should only be used when running a benchmark in native mode.
-        """
-
-        def __init__(self, suite: "PolybenchBenchmarkSuite", benchmark: str):
-            selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "fork"
-            key_fn = lambda dp: (dp["benchmark"], dp["native-image.stage"], dp["native-image.rebuild-number"])
-            field = "metric.value"
-
-            def update_fn(dp):
-                dp["metric.object"] = "build"
-                del dp["metric.fork-number"]
-                self.verify_and_process_id_score_function(dp)
-                return dp
-
-            lower_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "run-forks", "lower-percentile"], 0
-            )
-            upper_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "run-forks", "upper-percentile"], 1
-            )
-            super().__init__(suite, selector_fn, key_fn, field, update_fn, lower_percentile, upper_percentile)
-
-    class NativeModeBenchmarkSummaryPostProcessor(AfterAllForksAverageWithOutlierRemovalPostProcessor):
-        """
-        Post-processor that calculates the outlier excluded average of the "avg-time" metric across image builds
-        and produces a final "time" metric for a benchmark (separate "run" and "instrument-run" datapoints).
-        Should only be used when running a benchmark in native mode.
-        """
-
-        def __init__(self, suite: "PolybenchBenchmarkSuite", benchmark: str):
-            selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "build"
-            key_fn = lambda dp: (dp["benchmark"], dp["native-image.stage"])
-            field = "metric.value"
-
-            def update_fn(dp):
-                dp["metric.name"] = "time"
-                del dp["metric.object"]
-                del dp["native-image.rebuild-number"]
-                self.verify_and_process_id_score_function(dp)
-                return dp
-
-            lower_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "builds", "lower-percentile"], 0
-            )
-            upper_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "builds", "upper-percentile"], 1
-            )
-            super().__init__(suite, selector_fn, key_fn, field, update_fn, lower_percentile, upper_percentile)
-
-    class ServerModeBenchmarkSummaryPostProcessor(AfterAllForksAverageWithOutlierRemovalPostProcessor):
-        """
-        Post-processor that calculates the outlier excluded average of the "avg-time" metric across forks
-        and produces a final "time" metric for a benchmark.
-        Should only be used when running a benchmark in server (non-native) mode.
-        """
-
-        def __init__(self, suite: "PolybenchBenchmarkSuite", benchmark: str):
-            selector_fn = lambda dp: dp["metric.name"] == "avg-time" and dp["metric.object"] == "fork"
-            key_fn = lambda dp: dp["benchmark"]
-            field = "metric.value"
-
-            def update_fn(dp):
-                dp["metric.name"] = "time"
-                del dp["metric.object"]
-                del dp["metric.fork-number"]
-                self.verify_and_process_id_score_function(dp)
-                return dp
-
-            lower_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "run-forks", "lower-percentile"], 0
-            )
-            upper_percentile = suite.resolve_config_field_or_default(
-                [benchmark, "outlier-fork-removal", "run-forks", "upper-percentile"], 1
-            )
-            super().__init__(suite, selector_fn, key_fn, field, update_fn, lower_percentile, upper_percentile)
-
-    class GraalSpecificFieldsRemoverPostProcessor(DataPointsPostProcessor):
-        """
-        Removes all platform Graal specific fields from all the datapoints.
-        Used for cleaning up the bench results of a benchmark that runs on
-        a different platform (e.g. CPython).
-        The removed fields include:
-            * The "guest-vm" and "guest-vm-config" fields.
-            * All the "platform.*" fields.
-        """
-
-        def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
-            return [{k: v for k, v in dp.items() if self._should_be_kept(k)} for dp in datapoints]
-
-        def _should_be_kept(self, key) -> bool:
-            return key not in ["guest-vm", "guest-vm-config"] and not key.startswith("platform.")
-
     def post_processors(self) -> List[DataPointsPostProcessor]:
-        return self.execution_context.post_processors
-
-    def _get_post_processors(self, benchmark: str, native_mode: bool, bm_suite_args: List[str]):
         post_processors = []
-        if self.jvm(bm_suite_args) == "cpython":
-            post_processors += [PolybenchBenchmarkSuite.GraalSpecificFieldsRemoverPostProcessor()]
-        if native_mode:
-            post_processors += [
-                PolybenchBenchmarkSuite.NativeModeBenchmarkRenamingPostProcessor(self),
-                PolybenchBenchmarkSuite.NativeModeBuildSummaryPostProcessor(self, benchmark),
-                PolybenchBenchmarkSuite.NativeModeBenchmarkSummaryPostProcessor(self, benchmark),
-            ]
+
+        # Modify the datapoints already produced in this run
+        if self.jvm(bm_exec_context().get("bm_suite_args")) == "cpython":
+            post_processors.append(GraalSpecificFieldsRemoverPostProcessor())
+        if bm_exec_context().get("native_mode"):
+            post_processors.append(ImageStageDatapointDuplicatingPostProcessor(self))
+
+        # When running non-native benchmarks there is no concept of stages, there is only a single bench suite run.
+        # So we store a pretend "run" stage to indicate that all datapoints (for this fork) have already been produced.
+        current_stage = Stage.from_string("run")
+        try:
+            current_stage = self.stages_info.current_stage
+        except AttributeError:
+            pass
+        last_stage = current_stage.stage_name == StageName.RUN
+        # In the final stage of the final dispatch: calculate and add aggregate datapoints
+        if bm_exec_context().get("last_dispatch") and last_stage:
+            if bm_exec_context().get("native_mode"):
+                post_processors += [
+                    NativeModeBuildSummaryPostProcessor(self),
+                    NativeModeBenchmarkSummaryPostProcessor(self),
+                ]
+            else:
+                post_processors.append(NonNativeImageBenchmarkSummaryPostProcessor(self))
+            post_processors.append(ContextResetPostProcessor(self))
         else:
-            post_processors += [
-                PolybenchBenchmarkSuite.ServerModeBenchmarkSummaryPostProcessor(self, benchmark),
-            ]
+            # Store this run's datapoints in the PolybenchBenchmarkSuite.DATAPOINTS execution context key
+            # so they are available for final-dispatch aggregation.
+            post_processors.append(ContextStorePostProcessor())
+
         return post_processors
 
     @staticmethod
@@ -1027,16 +1488,20 @@ def _get_metric_name(bench_output) -> Optional[str]:
     ArgumentParser(add_help=False), "Options for the Polybench benchmark suite:"
 )
 _polybench_bench_suite_parser.parser.add_argument(
-    "--stable-run",
-    action="store_true",
+    "--stable-run-config",
     help=(
-        "Run a longer, more stable version of the benchmark, if configuration is available. "
+        "Run a longer, more stable version of the benchmark with the specified configuration. "
         "The stability of the benchmark is improved by building the language launcher multiple times and running "
         "multiple benchmark forks on each language launcher image. Outliers are removed and metrics are produced "
         "as an aggregate of the remaining runs. The number of repeated builds and forks, as well as the outlier "
-        "exclusion percentiles are defined per-benchmark."
+        "exclusion percentiles are defined per-benchmark in the configuration file."
     ),
 )
+_polybench_bench_suite_parser.parser.add_argument(
+    "--dry-stable-run",
+    action="store_true",
+    help=("Print the dispatching schedule and exit. Only has an effect when '--stable-run-config' is set."),
+)
 _polybench_bench_suite_parser.parser.add_argument(
     "--regenerate-instrumentation-profile",
     action="store_true",
diff --git a/truffle/mx.truffle/suite.py b/truffle/mx.truffle/suite.py
index 9936319637bd..e8801cf6a075 100644
--- a/truffle/mx.truffle/suite.py
+++ b/truffle/mx.truffle/suite.py
@@ -39,7 +39,7 @@
 # SOFTWARE.
 #
 suite = {
-  "mxversion": "7.64.0",
+  "mxversion": "7.67.0",
   "name" : "truffle",
   "version" : "25.1.0",
   "release" : False,
diff --git a/vm/mx.vm/suite.py b/vm/mx.vm/suite.py
index 8feb1ec35396..748f94be0691 100644
--- a/vm/mx.vm/suite.py
+++ b/vm/mx.vm/suite.py
@@ -1,7 +1,7 @@
 suite = {
     "name": "vm",
     "version" : "25.1.0",
-    "mxversion": "7.55.2",
+    "mxversion": "7.67.0",
     "release" : False,
     "groupId" : "org.graalvm",