Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions olive/evaluator/olive_evaluator.py
Comment thread
jambayk marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -698,9 +698,11 @@ def _inference(
targets = torch.cat(targets, dim=0)
logits = torch.cat(logits, dim=0)
# move model to cpu
# don't want model to be kept on gpu since model persists and takes up gpu memory
if device:
session.to("cpu")
# only move to cpu cannot release gpu memory, call cuda.empty_cache() to release gpu memory
if torch.cuda.is_available():
torch.cuda.empty_cache()
return OliveModelOutput(preds=preds, logits=logits), targets

def _evaluate_accuracy(
Expand Down Expand Up @@ -770,7 +772,9 @@ def _evaluate_latency(
# move model to cpu
if device:
session.to("cpu")

# only move to cpu cannot release gpu memory, call cuda.empty_cache() to release gpu memory
if torch.cuda.is_available():
torch.cuda.empty_cache()
return OliveEvaluator.compute_latency(metric, latencies)


Expand Down
2 changes: 2 additions & 0 deletions olive/passes/onnx/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def _convert_model_on_device(
# Reset to CPU so the resource consumed on GPU could be free.
if device != "cpu":
pytorch_model.to("cpu")
if torch.cuda.is_available():
torch.cuda.empty_cache()
# save the model to the output path and return the model
return model_proto_to_olive_model(onnx_model, output_model_path, config)

Expand Down
3 changes: 3 additions & 0 deletions olive/passes/pytorch/qlora.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ def _run_for_config(

# remove loaded model
new_model.model = None
del pytorch_model
if torch.cuda.is_available():
torch.cuda.empty_cache()
# remove the device map since we don't want "auto" device map
new_model.hf_config.model_loading_args.device_map = None
# remove model_overwrites from model_attributes
Expand Down