<a href="https://colab.research.google.com/github/prakash-bisht/Pytorch_Basic/blob/master/pytorch6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#DATALOADERS

In [None]:
xs = list(range(10))
ys = list(range(10,20))
print('xs values: ', xs)
print('ys values: ', ys)

xs values:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ys values:  [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]


In [None]:
dataset = list(zip(xs,ys))
dataset[0] # returns the tuple (x[0], y[0])

(0, 10)

In [None]:
class MyDataset:
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys
    
    def __getitem__(self, i):
        return self.xs[i], self.ys[i]
    
    def __len__(self):
        return len(self.xs)

In [None]:
dataset = MyDataset(xs, ys)
dataset[2] # returns the tuple (x[2], y[2])

(2, 12)

In [None]:
from torch.utils.data import DataLoader
for x, y in DataLoader(dataset):
    print(x,y)

tensor([0]) tensor([10])
tensor([1]) tensor([11])
tensor([2]) tensor([12])
tensor([3]) tensor([13])
tensor([4]) tensor([14])
tensor([5]) tensor([15])
tensor([6]) tensor([16])
tensor([7]) tensor([17])
tensor([8]) tensor([18])
tensor([9]) tensor([19])


In [None]:
for x, y in DataLoader(dataset, batch_size=2):
    print(x,y)

tensor([0, 1]) tensor([10, 11])
tensor([2, 3]) tensor([12, 13])
tensor([4, 5]) tensor([14, 15])
tensor([6, 7]) tensor([16, 17])
tensor([8, 9]) tensor([18, 19])


In [None]:
for x, y in DataLoader(dataset, batch_size=2, shuffle=True):
    print(x,y)

tensor([1, 7]) tensor([11, 17])
tensor([9, 6]) tensor([19, 16])
tensor([8, 4]) tensor([18, 14])
tensor([3, 5]) tensor([13, 15])
tensor([2, 0]) tensor([12, 10])


In [None]:
default_sampler = DataLoader(dataset).sampler
for i in default_sampler:
    # iterating over the SequentialSampler
    print(i)

0
1
2
3
4
5
6
7
8
9


In [None]:
type(default_sampler)

torch.utils.data.sampler.SequentialSampler

In [None]:
from torch.utils.data.sampler import SequentialSampler
sampler = SequentialSampler(dataset)
for x in sampler:
    print(x)

0
1
2
3
4
5
6
7
8
9


In [None]:
random_sampler = DataLoader(dataset, shuffle=True).sampler
for index in random_sampler:
    print(index)

2
5
9
7
6
0
1
4
3
8


In [None]:
type(random_sampler)

torch.utils.data.sampler.RandomSampler

In [None]:
from torch.utils.data.sampler import RandomSampler
random_sampler = RandomSampler(dataset)
for x in random_sampler:
    print(x)

9
5
1
8
6
3
2
4
7
0


In [None]:
dl = DataLoader(dataset, sampler=random_sampler)
for i in dl.sampler:
    print(i)

3
8
7
0
1
2
6
4
9
5


In [None]:
#Let's say we want all batches in the first half to be separate from the second half... that's where batch_samplers come in.

In [None]:
batch_size = 3
default_batch_sampler = DataLoader(dataset, batch_size=batch_size).batch_sampler
for i, batch_indices in enumerate(default_batch_sampler):
    print(f'Batch #{i} indices: ', batch_indices)

Batch #0 indices:  [0, 1, 2]
Batch #1 indices:  [3, 4, 5]
Batch #2 indices:  [6, 7, 8]
Batch #3 indices:  [9]


In [None]:
type(default_batch_sampler)

torch.utils.data.sampler.BatchSampler

In [None]:
from torch.utils.data.sampler import BatchSampler

In [None]:
print(BatchSampler.__doc__)

Wraps another sampler to yield a mini-batch of indices.

    Args:
        sampler (Sampler or Iterable): Base sampler. Can be any iterable object
        batch_size (int): Size of mini-batch.
        drop_last (bool): If ``True``, the sampler will drop the last batch if
            its size would be less than ``batch_size``

    Example:
        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
        [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    


#DATA PARALLELISM

In [None]:
######################################################################
# Imports and parameters
# ----------------------
#
# Import PyTorch modules and define parameters.
#

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Parameters and DataLoaders
input_size = 5
output_size = 2

batch_size = 30
data_size = 100

In [None]:

######################################################################
# Device
#
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

######################################################################
# Dummy DataSet
# -------------
#
# Make a dummy (random) dataset. You just need to implement the
# getitem
#

class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                         batch_size=batch_size, shuffle=True)

In [None]:
######################################################################
# Simple Model
# ------------
#
# For the demo, our model just gets an input, performs a linear operation, and
# gives an output. However, you can use ``DataParallel`` on any model (CNN, RNN,
# Capsule Net etc.)
#
# We've placed a print statement inside the model to monitor the size of input
# and output tensors.
# Please pay attention to what is printed at batch rank 0.
#

class Model(nn.Module):
    # Our model

    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, input):
        output = self.fc(input)
        print("\tIn Model: input size", input.size(),
              "output size", output.size())

        return output

In [None]:
######################################################################
# Create Model and DataParallel
# -----------------------------
#
# This is the core part of the tutorial. First, we need to make a model instance
# and check if we have multiple GPUs. If we have multiple GPUs, we can wrap
# our model using ``nn.DataParallel``. Then we can put our model on GPUs by
# ``model.to(device)``
#

model = Model(input_size, output_size)
if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  model = nn.DataParallel(model)

model.to(device)

In [None]:
######################################################################
# Run the Model
# -------------
#
# Now we can see the sizes of input and output tensors.
#

for data in rand_loader:
    input = data.to(device)
    output = model(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())

In [None]:
######################################################################
# Results
# -------
#
# If you have no GPU or one GPU, when we batch 30 inputs and 30 outputs, the model gets 30 and outputs 30 as
# expected. But if you have multiple GPUs, then you can get results like this.

In [None]:
# 2 GPUs
# ~~~~~~
#
# If you have 2, you will see:
#
# .. code:: bash
#
#     # on 2 GPUs
#     Let's use 2 GPUs!
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
#         In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
#     Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])

In [None]:
# 3 GPUs
# ~~~~~~
#
# If you have 3 GPUs, you will see:
#
# .. code:: bash
#
#     Let's use 3 GPUs!
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
#     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
#         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
#         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
#         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
#     Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])