pytorch · BY571 · Mar 18, 2024 · vmoens · Mar 19, 2024 · vmoens
diff --git a/examples/td3/td3.py b/examples/td3/td3.py
@@ -123,7 +123,8 @@ def main(cfg: "DictConfig"):  # noqa: F821
                 q_losses,
             ) = ([], [])
             for _ in range(num_updates):
-
+                if update_counter % 5 == 0:
+                    loss_module.reset_parameters()
                 # Update actor every delayed_updates
                 update_counter += 1
                 update_actor = update_counter % delayed_updates == 0

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
@@ -9,9 +9,10 @@
 import warnings
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import Iterator, List, Optional, Tuple
+from typing import Callable, Iterator, List, Optional, Tuple
 
 import torch
+import torch.nn.init as init
 from tensordict import is_tensor_collection, TensorDict, TensorDictBase
 
 from tensordict.nn import TensorDictModule, TensorDictModuleBase, TensorDictParams
@@ -363,6 +364,35 @@ def reset(self) -> None:
         # mainly used for PPO with KL target
         pass
 
+    def reset_parameters(
+        self,
+        module_names: Optional[List[Parameter]] = None,
-        module_names: Optional[List[Parameter]] = None,
+        module_names: List[Parameter] | None = None,
-        module_names: Optional[List[Parameter]] = None,
+        module_names: List[Parameter] | None = None,
+        init_func: Optional[Callable] = None,
-        init_func: Optional[Callable] = None,
+        init_func: Callable[[torch.Tensor], None] | None = None,
-        init_func: Optional[Callable] = None,
+        init_func: Callable[[torch.Tensor], None] | None = None,
+    ) -> None:
+        """Reset the parameters of the specified modules.
+
+        Args:
+            module_names (Optional[List[Parameter]]): A list of module names to reset the parameters for.
-            module_names (Optional[List[Parameter]]): A list of module names to reset the parameters for.
+            module_names (list of nn.Parameter, optional): A list of module names to reset the parameters for.
-            module_names (Optional[List[Parameter]]): A list of module names to reset the parameters for.
+            module_names (list of nn.Parameter, optional): A list of module names to reset the parameters for.
+                If None, all modules with names ending in "_params" will be reset.
+            init_func (Optional[Callable]): A function to initialize the parameters.
-            init_func (Optional[Callable]): A function to initialize the parameters.
+            init_func (Callable[[torch.Tensor], None]): A function to initialize the parameters.
-            init_func (Optional[Callable]): A function to initialize the parameters.
+            init_func (Callable[[torch.Tensor], None]): A function to initialize the parameters.
+                If None, the parameters will be initialized with uniform random values between -1 and 1.
+        """
+        if module_names is None:
+            params_2_reset = [
+                name for name in self._modules.keys() if name.endswith("_params")
+            ]
+        else:
+            params_2_reset = [name + "_params" for name in module_names]
+
+        def _reset_params(param):
+            if init_func is not None:
+                init_func(param.data)
+            else:
+                init.uniform_(param.data, -1, 1)
+
+        for name in params_2_reset:
+            getattr(self, name).apply(_reset_params)
+
     @property
     def value_estimator(self) -> ValueEstimatorBase:
         """The value function blends in the reward and value estimate(s) from upcoming state(s)/state-action pair(s) into a target value estimate for the value network."""