macrocosm-os · dbobrenko · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
diff --git a/prompting/weight_setting/weight_setter.py b/prompting/weight_setting/weight_setter.py
@@ -23,7 +23,7 @@
 
 
 async def set_weights(
-    weights: np.ndarray,
+    weights: npt.NDArray[np.float32],
     subtensor: bt.Subtensor | None = None,
     metagraph: bt.Metagraph | None = None,
     weight_syncer: WeightSynchronizer | None = None,
@@ -115,6 +115,21 @@ async def start(
         await self._load_rewards()
         return await super().start(name=name)
 
+    async def _compute_avg_reward(self) -> npt.NDArray[np.float32]:
+        """Compute reward average based on the `reward_history` and `reward_average_len` window."""
+        num_uids = int(shared_settings.METAGRAPH.n.item())
+        accum = np.zeros(num_uids, dtype=np.float32)
+        if not isinstance(self.reward_history, deque) or len(self.reward_history) == 0:
+            logger.warning(f"Empty rewards history, setting zero weights: {self.reward_history}")
+            return accum
+
+        for snapshot in self.reward_history:
+            for uid_str, info in snapshot.items():
+                accum[int(uid_str)] += float(info["reward"])
+
+        avg = accum / len(self.reward_history)
+        return avg
+
     async def _save_rewards(self, rewards: npt.NDArray[np.float32]):
         """Persist the latest epoch rewards.
 
@@ -255,14 +270,16 @@ async def run_step(self):
                 return
 
             await self._save_rewards(final_rewards)
-            final_rewards[final_rewards < 0] = 0
-            final_rewards /= np.sum(final_rewards) + 1e-10
+            averaged_rewards = await self._compute_avg_reward()
+            averaged_rewards[averaged_rewards < 0] = 0
+            averaged_rewards /= np.sum(averaged_rewards) + 1e-10
         except BaseException as ex:
             logger.exception(f"{ex}")
+            return
 
         # Set weights on chain.
         await set_weights(
-            final_rewards,
+            averaged_rewards,
             subtensor=shared_settings.SUBTENSOR,
             metagraph=shared_settings.metagraph_force_sync(),
             weight_syncer=self.weight_syncer,

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "prompting"
-version = "2.19.8"
+version = "2.19.9"
 description = "Subnetwork 1 runs on Bittensor and is maintained by Macrocosmos. It's an effort to create decentralised AI"
 authors = ["Kalei Brady, Dmytro Bobrenko, Felix Quinque, Steffen Cruz, Richard Wardle"]
 readme = "README.md"

diff --git a/tests/prompting/weight_setting/test_weight_setter.py b/tests/prompting/weight_setting/test_weight_setter.py
@@ -1,5 +1,6 @@
 # ruff: noqa: E402
 import asyncio
+from collections import deque
 from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
@@ -90,7 +91,7 @@ def test_steepness():
     assert result[0] < 0, "Negative reward should remain negative"
 
 
-def test_run_step_with_reward_events():
+def test_run_step_with_reward_events(tmp_path: Path):
     with (
         patch("shared.uids.get_uids") as mock_get_uids,
         patch("prompting.weight_setting.weight_setter.TaskRegistry") as MockTaskRegistry,
@@ -126,7 +127,7 @@ def __init__(self, task, uids, rewards, weight):
 
         # Set up the mock mutable_globals.
 
-        weight_setter = WeightSetter(reward_history_path=Path("test_validator_rewards.jsonl"))
+        weight_setter = WeightSetter(reward_history_path=tmp_path / "test_validator_rewards.jsonl")
         reward_events = [
             [
                 WeightedRewardEvent(
@@ -165,6 +166,37 @@ def __init__(self, task, uids, rewards, weight):
         mock_logger.warning.assert_not_called()
 
 
+def _make_snapshot(values: list[float]) -> dict[int, dict[str, float]]:
+    return {uid: {"reward": v} for uid, v in enumerate(values)}
+
+
+@pytest.mark.asyncio
+async def test_avg_reward_non_empty(tmp_path: Path) -> None:
+    """Mean over two snapshots equals manual average."""
+    ws = WeightSetter(reward_history_path=tmp_path / "test_validator_rewards.jsonl")
+    ws.reward_history_len = 10
+    ws.reward_history = deque(maxlen=10)
+    rewards = list(range(256))
+    ws.reward_history.append(_make_snapshot(rewards))
+    ws.reward_history.append(_make_snapshot(rewards[::-1]))
+
+    result = await ws._compute_avg_reward()
+
+    expected = np.full(256, 255 / 2, dtype=np.float32)
+    assert result.dtype == np.float32
+    assert np.allclose(result, expected, atol=1e-6)
+
+
+@pytest.mark.asyncio
+async def test_avg_reward_empty(monkeypatch: MonkeyPatch, tmp_path: Path) -> None:
+    """Empty history returns a zero vector."""
+    ws = WeightSetter(reward_history_path=tmp_path / "test_validator_rewards.jsonl")
+    ws.reward_history_len = 10
+    ws.reward_history = deque(maxlen=10)
+    result = await ws._compute_avg_reward()
+    assert np.array_equal(result, np.zeros(256, dtype=np.float32))
+
+
 @pytest.mark.asyncio
 async def test_set_weights(monkeypatch: MonkeyPatch):
     """`set_weights` calls Subtensor.set_weights with processed vectors."""

diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py
@@ -237,7 +237,7 @@ async def chat_completion(
     uids: Optional[list[int]] = None,
     num_miners: int = 5,
     uid_tracker: UidTracker | None = None,
-    add_reliable_miners: int = 1,
+    add_reliable_miners: int = 3,
 ) -> tuple | StreamingResponse:
     # TODO: Add docstring.
     """Handle chat completion with multiple miners in parallel."""