In [19]:
import torch
from torch.nn import DataParallel, Linear
import torchvision
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
import numpy as np
import  matplotlib.pyplot as plt
from matplotlib.pyplot import imshow


In [20]:
!jupyter nbconvert --to script DistributedDataParallel_script.ipynb
!head DistributedDataParallel_script.py

[NbConvertApp] Converting notebook DistributedDataParallel_script.ipynb to script
[NbConvertApp] Writing 11504 bytes to DistributedDataParallel_script.py
#!/usr/bin/env python
# coding: utf-8

# In[5]:


import torch
from torch.nn import DataParallel, Linear
import torchvision
from torch.utils.data.dataset import Dataset


#### Single GPU training

In [21]:
%time !python DistributedDataParallel_script.py --issingle --datapath '../MNIST_data'

SystemLog: args =  Namespace(backend='gloo', batch=16, datapath='../MNIST_data', epochs=1, global_rank=None, isaml=False, issingle=True, local_rank=None, lr=0.001, size=None)
SystemLog: Single GPU training
SystemLog: loading data to ../MNIST_data
Dataset MNIST
    Number of datapoints: 60000
    Root location: ../MNIST_data
    Split: Train
60000 10000
(784,) 5
SystemLog: using device cuda
100%|██████████████████████████████████████| 3750/3750 [00:12<00:00, 290.48it/s]
SystemLog: loss at epoch 0 is 0.012111485004425049
SystemLog: shape = torch.Size([128, 784]), mean of values = -0.003952726721763611
SystemLog: shape = torch.Size([128]), mean of values = 0.016963692381978035
SystemLog: shape = torch.Size([64, 128]), mean of values = 0.009603319689631462
SystemLog: shape = torch.Size([64]), mean of values = 0.019543424248695374
SystemLog: shape = torch.Size([10, 64]), mean of values = -0.031197739765048027
SystemLog: shape = torch.Size([10]), mean of values = -0.012650328688323498
100%|█

#### Single GPU distributed training

In [22]:
%time !python DistributedDataParallel_script.py --local_rank 0 --global_rank 0 --size 1 --backend 'gloo'

SystemLog: args =  Namespace(backend='gloo', batch=16, datapath=None, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=1)
SystemLog: starting distibuted training Namespace(backend='gloo', batch=16, datapath=None, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=1)
SystemLog: inside train_distrib rank 0 and world_size 1
SystemLog: loading data to ./MNIST_data/0/
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST_data/0/MNIST/raw/train-images-idx3-ubyte.gz
9920512it [00:00, 15150541.15it/s]                                              
Extracting ./MNIST_data/0/MNIST/raw/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST_data/0/MNIST/raw/train-labels-idx1-ubyte.gz
32768it [00:00, 211978.40it/s]                                                  
Extracting ./MNIST_data/0/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.le

In [106]:
%time !python DistributedDataParallel_script.py --local_rank 0 --global_rank 0 --size 1 --backend 'nccl'

args =  Namespace(backend='nccl', batch=16, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=1)
starting distibuted training Namespace(backend='nccl', batch=16, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=1)
inside train_distrib rank 0 and world_size 1
loading data
Dataset MNIST
    Number of datapoints: 60000
    Root location: ./MNIST_data/0/
    Split: Train
60000 10000
(784,) 5
setting up
MASTER_ADDR = localhost
MASTER_PORT = 12355
using device 0
100%|██████████████████████████████████████| 3750/3750 [00:14<00:00, 267.42it/s]
loss at epoch 0 is 0.0966588482260704
shape = torch.Size([128, 784]), mean of values = -0.0026552591007202864
shape = torch.Size([128]), mean of values = 0.020683109760284424
shape = torch.Size([64, 128]), mean of values = 0.008393129333853722
shape = torch.Size([64]), mean of values = 0.025294966995716095
shape = torch.Size([10, 64]), mean of values = -0.028980279341340065
shape = torch.

### Multi process

In [108]:
%time !python DistributedDataParallel_script.py --local_rank 0 --global_rank 0 --size 2 --batch 8

args =  Namespace(backend='gloo', batch=8, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=2)
starting distibuted training Namespace(backend='gloo', batch=8, epochs=1, global_rank=0, isaml=False, issingle=False, local_rank=0, lr=0.001, size=2)
inside train_distrib rank 0 and world_size 2
loading data
Dataset MNIST
    Number of datapoints: 60000
    Root location: ./MNIST_data/0/
    Split: Train
60000 10000
(784,) 5
setting up
MASTER_ADDR = localhost
MASTER_PORT = 12355
using device 0
100%|██████████████████████████████████████| 3750/3750 [00:15<00:00, 247.44it/s]
loss at epoch 0 is 0.17599113285541534
shape = torch.Size([128, 784]), mean of values = -0.0026919872034341097
shape = torch.Size([128]), mean of values = 0.021903974935412407
shape = torch.Size([64, 128]), mean of values = 0.00897801574319601
shape = torch.Size([64]), mean of values = 0.027450013905763626
shape = torch.Size([10, 64]), mean of values = -0.028402984142303467
shape = torch.Si