neuralmagic · bfineran · Jun 13, 2022 · Jun 10, 2022 · Jun 10, 2022
diff --git a/src/sparseml/pytorch/image_classification/utils/trainer.py b/src/sparseml/pytorch/image_classification/utils/trainer.py
@@ -33,6 +33,7 @@
     default_device,
     is_parallel_model,
 )
+from sparsezoo import Zoo
 
 
 _LOGGER = logging.getLogger(__file__)
@@ -327,6 +328,10 @@ def _run_train_epoch(
         )
 
     def _setup_checkpoint_manager(self):
+        if self.checkpoint_path and self.checkpoint_path.startswith("zoo"):
+            self.checkpoint_path = Zoo.load_model_from_stub(
+                self.checkpoint_path
+            ).download_framework_files(extensions=[".pth"])[0]
         checkpoint_state = torch.load(self.checkpoint_path)
         checkpoint_manager = None
         checkpoint_recipe = checkpoint_state.get("recipe")

diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py
@@ -254,9 +254,10 @@ def create_optimizer(self):
             if torch.distributed.is_initialized()
             else self.args._n_gpu
         )
+        n_device = n_gpu if n_gpu > 0 else 1
         total_batch_size = (
             self.args.per_device_train_batch_size
-            * n_gpu
+            * n_device
             * self.args.gradient_accumulation_steps
         )
         self.manager_steps_per_epoch = math.ceil(