Skip to content

Commit

Permalink
Merge branch 'master' into fix-hpz-with-zero-elt
Browse files Browse the repository at this point in the history
  • Loading branch information
loadams committed Jun 17, 2024
2 parents 025ddb0 + 7331630 commit b8dacfe
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion accelerator/xpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ def range_pop(self):
return

def lazy_call(self, callback):
return torch.xpu.lazy_init._lazy_call(callback)
if hasattr(torch.xpu, "_lazy_call"):
return torch.xpu._lazy_call(callback)
else:
return torch.xpu.lazy_init._lazy_call(callback)

def communication_backend_name(self):
return self._communication_backend_name
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ def _exec_forward_pass(self, buffer_id):
raise ValueError("expecting a tensor or a tuple of tensors")
part = PartitionedTensor(tensor=first_output, group=self.grid.get_slice_parallel_group())
# Clear the large output data, but save the computation graph
first_output.data = torch.zeros(1)
first_output.data = torch.zeros(1, device=first_output.data.device)
self.pipe_buffers['output_tensors'][buffer_id] = first_output
# Inject the partitioned tensor into the output before sending
outputs = (part.to_meta(), part.data(), *outputs_tail)
Expand Down

0 comments on commit b8dacfe

Please sign in to comment.