In [8]:
import torch
from torch.utils.data import Dataset, DataLoader

### create ToyDataset

In [2]:
# create Toy dataset
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])
y_train = torch.tensor([0,0,0,1,1])

X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])
y_test = torch.tensor([0,1])

In [6]:
# create custome dataset class
class ToyDataset(Dataset):
    def __init__(self, X, y):
        # these vars could be file paths, file objects, database connectors, etc. we are using X,y bc created toydataset above sitting in mem
        self.features = X
        self.labels = y

    def __getitem__(self, index):
        # retrieve exactly one data record and corresponding label
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y
    
    def __len__(self):
        # return dataset row len
        return self.labels.shape[0]

In [5]:
train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [7]:
print(len(train_ds))

5


### create DataLoader

In [9]:
torch.manual_seed(123)

<torch._C.Generator at 0x120cec290>

In [10]:
train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0
)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=False,
    num_workers=0
)

In [11]:
# iterate over train_loader
for idx, (x,y) in enumerate(train_loader):
    print(f"Batch {idx+1}: ", x, y)

Batch 1:  tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2:  tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 3:  tensor([[ 2.7000, -1.5000]]) tensor([1])


In [13]:
# iterate over test_loader
for idx, (x,y) in enumerate(test_loader):
    print(f"Batch {idx+1}: ", x, y)

Batch 1:  tensor([[-0.8000,  2.8000],
        [ 2.6000, -1.6000]]) tensor([0, 1])


In [15]:
# drop last batch in each epoch
train_loader_drop_last = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

In [16]:
for idx, (x,y) in enumerate(train_loader_drop_last):
    print(f"Batch {idx+1}: ", x, y)

Batch 1:  tensor([[-0.5000,  2.6000],
        [-0.9000,  2.9000]]) tensor([0, 0])
Batch 2:  tensor([[-1.2000,  3.1000],
        [ 2.3000, -1.1000]]) tensor([0, 1])
