Skip to content

Commit

Permalink
Fix benchmarks/distributed/ddp/benchmark.py (#51095)
Browse files Browse the repository at this point in the history
Summary:
Fixes the issue reported in #50679 by using built-in object-based collectives. User has verified this patch works

Test with:
RANK=0 python3 pytorch-dist-benchmark.py --world-size 2 --master-addr 127.0.0.1 --master-port 23456
RANK=1 python3 pytorch-dist-benchmark.py --world-size 2 --master-addr 127.0.0.1 --master-port 23456

Pull Request resolved: #51095

Reviewed By: SciPioneer

Differential Revision: D26070275

Pulled By: rohan-varma

fbshipit-source-id: 59abcaac9e395bcdd8a018bf6ba07521d94b2fdf
  • Loading branch information
rohan-varma authored and facebook-github-bot committed Jan 29, 2021
1 parent 1b089c1 commit 5021582
Showing 1 changed file with 3 additions and 18 deletions.
21 changes: 3 additions & 18 deletions benchmarks/distributed/ddp/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#

import argparse
import io
import itertools
import json
import os
Expand All @@ -27,23 +26,9 @@


def allgather_object(obj):
buffer = io.BytesIO()
torch.save(obj, buffer)
input_tensor = torch.ByteTensor(list(buffer.getvalue()))
input_length = torch.IntTensor([input_tensor.size(0)])
dist.all_reduce(input_length, op=dist.ReduceOp.MAX)
input_tensor.resize_(input_length[0])
output_tensors = [
torch.empty(input_tensor.size(), dtype=torch.uint8)
for _ in range(dist.get_world_size())
]
dist.all_gather(output_tensors, input_tensor)
output = []
for tensor in output_tensors:
buffer = io.BytesIO(np.asarray(tensor).tobytes())
output.append(torch.load(buffer))
return output

out = [None for _ in range(dist.get_world_size())]
dist.all_gather_object(out, obj)
return out

def allgather_run(cmd):
proc = subprocess.run(shlex.split(cmd), capture_output=True)
Expand Down

0 comments on commit 5021582

Please sign in to comment.