microsoft · trajepl · Oct 23, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 23, 2023
diff --git a/olive/evaluator/olive_evaluator.py b/olive/evaluator/olive_evaluator.py
@@ -698,9 +698,11 @@ def _inference(
         targets = torch.cat(targets, dim=0)
         logits = torch.cat(logits, dim=0)
         # move model to cpu
-        # don't want model to be kept on gpu since model persists and takes up gpu memory
         if device:
             session.to("cpu")
+        # only move to cpu cannot release gpu memory, call cuda.empty_cache() to release gpu memory
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         return OliveModelOutput(preds=preds, logits=logits), targets
 
     def _evaluate_accuracy(
@@ -770,7 +772,9 @@ def _evaluate_latency(
         # move model to cpu
         if device:
             session.to("cpu")
-
+        # only move to cpu cannot release gpu memory, call cuda.empty_cache() to release gpu memory
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         return OliveEvaluator.compute_latency(metric, latencies)
 
 

diff --git a/olive/passes/onnx/conversion.py b/olive/passes/onnx/conversion.py
@@ -192,6 +192,8 @@ def _convert_model_on_device(
         # Reset to CPU so the resource consumed on GPU could be free.
         if device != "cpu":
             pytorch_model.to("cpu")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         # save the model to the output path and return the model
         return model_proto_to_olive_model(onnx_model, output_model_path, config)
 

diff --git a/olive/passes/pytorch/qlora.py b/olive/passes/pytorch/qlora.py
@@ -269,6 +269,9 @@ def _run_for_config(
 
         # remove loaded model
         new_model.model = None
+        del pytorch_model
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         # remove the device map since we don't want "auto" device map
         new_model.hf_config.model_loading_args.device_map = None
         # remove model_overwrites from model_attributes