Try to upload best model to WandB during training

sgoodfriend · sgoodfriend · commit 9ba0ab50894e · 2023-04-10T17:27:33.000-07:00
diff --git a/rl_algo_impls/runner/train.py b/rl_algo_impls/runner/train.py
@@ -49,7 +49,7 @@ def train(args: TrainArgs):
     print(hyperparams)
     config = Config(args, hyperparams, os.getcwd())
 
-    wandb_enabled = args.wandb_project_name
+    wandb_enabled = bool(args.wandb_project_name)
     if wandb_enabled:
         wandb.tensorboard.patch(
             root_logdir=config.tensorboard_summary_path, pytorch=True
@@ -108,6 +108,7 @@ def train(args: TrainArgs):
         else None,
         best_video_dir=config.best_videos_dir,
         additional_keys_to_log=config.additional_keys_to_log,
+        wandb_enabled=wandb_enabled,
     )
     callbacks: List[Callback] = [eval_callback]
     if config.hyperparams.microrts_reward_decay_callback:
@@ -151,13 +152,8 @@ def train(args: TrainArgs):
 
     if wandb_enabled:
         shutil.make_archive(
-            os.path.join(wandb.run.dir, config.model_dir_name()),
+            os.path.join(wandb.run.dir, config.model_dir_name()),  # type: ignore
             "zip",
             config.model_dir_path(),
         )
-        shutil.make_archive(
-            os.path.join(wandb.run.dir, config.model_dir_name(best=True)),
-            "zip",
-            config.model_dir_path(best=True),
-        )
         wandb.finish()
diff --git a/rl_algo_impls/shared/callbacks/eval_callback.py b/rl_algo_impls/shared/callbacks/eval_callback.py
@@ -1,5 +1,6 @@
 import itertools
 import os
+import shutil
 from time import perf_counter
 from typing import Dict, List, Optional, Union
 
@@ -132,6 +133,7 @@ def __init__(
         ignore_first_episode: bool = False,
         additional_keys_to_log: Optional[List[str]] = None,
         score_function: str = "mean-std",
+        wandb_enabled: bool = False,
     ) -> None:
         super().__init__()
         self.policy = policy
@@ -157,6 +159,7 @@ def __init__(
         self.ignore_first_episode = ignore_first_episode
         self.additional_keys_to_log = additional_keys_to_log
         self.score_function = score_function
+        self.wandb_enabled = wandb_enabled
 
     def on_step(self, timesteps_elapsed: int = 1) -> bool:
         super().on_step(timesteps_elapsed)
@@ -196,6 +199,15 @@ def evaluate(
                 assert self.best_model_path
                 self.policy.save(self.best_model_path)
                 print("Saved best model")
+                if self.wandb_enabled:
+                    import wandb
+
+                    best_model_name = os.path.split(self.best_model_path)[-1]
+                    shutil.make_archive(
+                        os.path.join(wandb.run.dir, best_model_name),  # type: ignore
+                        "zip",
+                        self.best_model_path,
+                    )
             self.best.write_to_tensorboard(
                 self.tb_writer, "best_eval", self.timesteps_elapsed
             )