In [1]:
xs = list(range(10))
ys = list(range(10,20))
print('xs values: ', xs)
print('ys values: ', ys)

dataset = list(zip(xs,ys))
dataset[0] # returns the tuple (x[0], y[0])

len(dataset)

xs values:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ys values:  [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]


10

In [2]:
#collapse-show

class MyDataset:
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys
    
    def __getitem__(self, i):
        return self.xs[i], self.ys[i]
    
    def __len__(self):
        return len(self.xs)
dataset = MyDataset(xs, ys)
dataset[2] # returns the tuple (x[2], y[2])

len(dataset)

10

In [3]:

from torch.utils.data import DataLoader

for x, y in DataLoader(dataset):
    print(x,y)

tensor([0]) tensor([10])
tensor([1]) tensor([11])
tensor([2]) tensor([12])
tensor([3]) tensor([13])
tensor([4]) tensor([14])
tensor([5]) tensor([15])
tensor([6]) tensor([16])
tensor([7]) tensor([17])
tensor([8]) tensor([18])
tensor([9]) tensor([19])


In [4]:
for x, y in DataLoader(dataset, batch_size=2):
    print(x,y)

tensor([0, 1]) tensor([10, 11])
tensor([2, 3]) tensor([12, 13])
tensor([4, 5]) tensor([14, 15])
tensor([6, 7]) tensor([16, 17])
tensor([8, 9]) tensor([18, 19])


In [5]:

for x, y in DataLoader(dataset, batch_size=2, shuffle=True):
    print(x,y)

tensor([1, 9]) tensor([11, 19])
tensor([8, 3]) tensor([18, 13])
tensor([0, 4]) tensor([10, 14])
tensor([6, 2]) tensor([16, 12])
tensor([5, 7]) tensor([15, 17])


In [6]:
default_sampler = DataLoader(dataset).sampler

In [7]:

for i in default_sampler:
    # iterating over the SequentialSampler
    print(i)

0
1
2
3
4
5
6
7
8
9


In [8]:

type(default_sampler)

torch.utils.data.sampler.SequentialSampler

In [9]:
from torch.utils.data.sampler import SequentialSampler

sampler = SequentialSampler(dataset)

for x in sampler:
    print(x)

0
1
2
3
4
5
6
7
8
9


In [10]:
random_sampler = DataLoader(dataset, shuffle=True).sampler
for index in random_sampler:
    print(index)

2
3
1
7
6
0
5
4
8
9


In [11]:
type(random_sampler)

torch.utils.data.sampler.RandomSampler

In [12]:

from torch.utils.data.sampler import RandomSampler

random_sampler = RandomSampler(dataset)

for x in random_sampler:
    print(x)

7
8
1
9
5
2
6
4
3
0


In [13]:
dl = DataLoader(dataset, sampler=random_sampler)
for i in dl.sampler:
    print(i)

0
9
6
8
4
7
3
2
5
1


In [14]:

#collapse-hide
import random
from torch.utils.data.sampler import Sampler

class IndependentHalvesSampler(Sampler):
    def __init__(self, dataset):
        halfway_point = int(len(dataset)/2)
        self.first_half_indices = list(range(halfway_point))
        self.second_half_indices = list(range(halfway_point, len(dataset)))
        
    def __iter__(self):
        random.shuffle(self.first_half_indices)
        random.shuffle(self.second_half_indices)
        return iter(self.first_half_indices + self.second_half_indices)
    
    def __len__(self):
        return len(self.first_half_indices) + len(self.second_half_indices)

In [15]:
our_sampler = IndependentHalvesSampler(dataset)
print('First half indices: ', our_sampler.first_half_indices)
print('Second half indices:', our_sampler.second_half_indices)

First half indices:  [0, 1, 2, 3, 4]
Second half indices: [5, 6, 7, 8, 9]


In [16]:

for i in our_sampler:
    print(i)

3
4
0
1
2
8
5
7
6
9


In [17]:

dl = DataLoader(dataset, sampler=our_sampler)
for xb, yb in dl:
    print(xb, yb)

tensor([0]) tensor([10])
tensor([2]) tensor([12])
tensor([3]) tensor([13])
tensor([4]) tensor([14])
tensor([1]) tensor([11])
tensor([6]) tensor([16])
tensor([9]) tensor([19])
tensor([5]) tensor([15])
tensor([8]) tensor([18])
tensor([7]) tensor([17])


In [19]:
batch_size=7
dl = DataLoader(dataset, batch_size=batch_size, sampler=our_sampler)
for xb, yb in dl:
    print(xb, yb)

tensor([0, 1, 3, 2, 4, 9, 8]) tensor([10, 11, 13, 12, 14, 19, 18])
tensor([6, 7, 5]) tensor([16, 17, 15])


In [20]:

batch_size = 3
default_batch_sampler = DataLoader(dataset, batch_size=batch_size).batch_sampler
for i, batch_indices in enumerate(default_batch_sampler):
    print(f'Batch #{i} indices: ', batch_indices)

Batch #0 indices:  [0, 1, 2]
Batch #1 indices:  [3, 4, 5]
Batch #2 indices:  [6, 7, 8]
Batch #3 indices:  [9]


In [21]:
type(default_batch_sampler)

torch.utils.data.sampler.BatchSampler

In [22]:

from torch.utils.data.sampler import BatchSampler

In [23]:
print(BatchSampler.__doc__)

Wraps another sampler to yield a mini-batch of indices.

    Args:
        sampler (Sampler or Iterable): Base sampler. Can be any iterable object
        batch_size (int): Size of mini-batch.
        drop_last (bool): If ``True``, the sampler will drop the last batch if
            its size would be less than ``batch_size``

    Example:
        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
        [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
        >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
        [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    


In [24]:
batch_sampler = BatchSampler(our_sampler, batch_size=2, drop_last=False)
for i, batch_indices in enumerate(batch_sampler):
    print(f'Batch #{i} indices: ', batch_indices)

Batch #0 indices:  [3, 2]
Batch #1 indices:  [0, 4]
Batch #2 indices:  [1, 5]
Batch #3 indices:  [7, 6]
Batch #4 indices:  [8, 9]


In [25]:
import torch

In [26]:
def chunk(indices, chunk_size):
    return torch.split(torch.tensor(indices), chunk_size)

class EachHalfTogetherBatchSampler(Sampler):
    def __init__(self, dataset, batch_size):
        halfway_point = len(dataset) // 2 
        self.first_half_indices = list(range(halfway_point))
        self.second_half_indices = list(range(halfway_point, len(dataset)))
        self.batch_size = batch_size
    
    def __iter__(self):
        random.shuffle(self.first_half_indices)
        random.shuffle(self.second_half_indices)
        first_half_batches  = chunk(self.first_half_indices, self.batch_size)
        second_half_batches = chunk(self.second_half_indices, self.batch_size)
        combined = list(first_half_batches + second_half_batches)
        combined = [batch.tolist() for batch in combined]
        random.shuffle(combined)
        return iter(combined)
    
    def __len__(self):
        return (len(self.first_half_indices) + len(self.second_half_indices)) // self.batch_size


In [27]:
batch_size = 2
each_half_together_batch_sampler = EachHalfTogetherBatchSampler(dataset, batch_size)
for x in each_half_together_batch_sampler:
    print(x)

[0, 1]
[8, 5]
[4, 3]
[7]
[6, 9]
[2]


In [28]:
for i, (xb,yb) in enumerate(DataLoader(dataset, batch_sampler=each_half_together_batch_sampler)):
    print(f'Batch #{i}. x{i}:', xb)
    print(f'          y{i}:', yb)

Batch #0. x0: tensor([9, 8])
          y0: tensor([19, 18])
Batch #1. x1: tensor([0])
          y1: tensor([10])
Batch #2. x2: tensor([4, 3])
          y2: tensor([14, 13])
Batch #3. x3: tensor([5])
          y3: tensor([15])
Batch #4. x4: tensor([6, 7])
          y4: tensor([16, 17])
Batch #5. x5: tensor([2, 1])
          y5: tensor([12, 11])
