In [8]:
xs = list(range(10))
ys = list(range(10,20))
print('xs values: ', xs)
print('ys values: ', ys)

dataset = list(zip(xs,ys))
dataset[0] # returns the tuple (x[0], y[0])

len(dataset)

xs values:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ys values:  [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]


10

In [12]:
#collapse-show

class MyDataset:
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys
    
    def __getitem__(self, i):
        return self.xs[i], self.ys[i]
    
    def __len__(self):
        return len(self.xs)
dataset = MyDataset(xs, ys)
dataset[2] # returns the tuple (x[2], y[2])

len(dataset)

10

In [13]:

from torch.utils.data import DataLoader

for x, y in DataLoader(dataset):
    print(x,y)

tensor([0]) tensor([10])
tensor([1]) tensor([11])
tensor([2]) tensor([12])
tensor([3]) tensor([13])
tensor([4]) tensor([14])
tensor([5]) tensor([15])
tensor([6]) tensor([16])
tensor([7]) tensor([17])
tensor([8]) tensor([18])
tensor([9]) tensor([19])


In [14]:
for x, y in DataLoader(dataset, batch_size=2):
    print(x,y)

tensor([0, 1]) tensor([10, 11])
tensor([2, 3]) tensor([12, 13])
tensor([4, 5]) tensor([14, 15])
tensor([6, 7]) tensor([16, 17])
tensor([8, 9]) tensor([18, 19])


In [15]:

for x, y in DataLoader(dataset, batch_size=2, shuffle=True):
    print(x,y)

tensor([7, 9]) tensor([17, 19])
tensor([5, 2]) tensor([15, 12])
tensor([8, 4]) tensor([18, 14])
tensor([1, 3]) tensor([11, 13])
tensor([0, 6]) tensor([10, 16])


In [16]:
default_sampler = DataLoader(dataset).sampler

In [17]:

for i in default_sampler:
    # iterating over the SequentialSampler
    print(i)

0
1
2
3
4
5
6
7
8
9


In [18]:

type(default_sampler)

torch.utils.data.sampler.SequentialSampler

In [19]:
from torch.utils.data.sampler import SequentialSampler

sampler = SequentialSampler(dataset)

for x in sampler:
    print(x)

0
1
2
3
4
5
6
7
8
9


In [20]:
random_sampler = DataLoader(dataset, shuffle=True).sampler
for index in random_sampler:
    print(index)

9
6
2
5
1
4
3
8
0
7


In [21]:
type(random_sampler)

torch.utils.data.sampler.RandomSampler

In [22]:

from torch.utils.data.sampler import RandomSampler

random_sampler = RandomSampler(dataset)

for x in random_sampler:
    print(x)

5
8
6
1
2
7
0
4
3
9


In [23]:
dl = DataLoader(dataset, sampler=random_sampler)
for i in dl.sampler:
    print(i)

0
6
9
3
4
5
2
1
7
8


In [24]:

#collapse-hide
import random
from torch.utils.data.sampler import Sampler

class IndependentHalvesSampler(Sampler):
    def __init__(self, dataset):
        halfway_point = int(len(dataset)/2)
        self.first_half_indices = list(range(halfway_point))
        self.second_half_indices = list(range(halfway_point, len(dataset)))
        
    def __iter__(self):
        random.shuffle(self.first_half_indices)
        random.shuffle(self.second_half_indices)
        return iter(self.first_half_indices + self.second_half_indices)
    
    def __len__(self):
        return len(self.first_half_indices) + len(self.second_half_indices)

In [25]:
our_sampler = IndependentHalvesSampler(dataset)
print('First half indices: ', our_sampler.first_half_indices)
print('Second half indices:', our_sampler.second_half_indices)

First half indices:  [0, 1, 2, 3, 4]
Second half indices: [5, 6, 7, 8, 9]


In [26]:

for i in our_sampler:
    print(i)

1
3
0
4
2
5
6
8
9
7


In [27]:

dl = DataLoader(dataset, sampler=our_sampler)
for xb, yb in dl:
    print(xb, yb)

tensor([2]) tensor([12])
tensor([0]) tensor([10])
tensor([1]) tensor([11])
tensor([4]) tensor([14])
tensor([3]) tensor([13])
tensor([6]) tensor([16])
tensor([7]) tensor([17])
tensor([5]) tensor([15])
tensor([9]) tensor([19])
tensor([8]) tensor([18])
