Demo to handle data in torch

In [14]:
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torchvision import transforms, utils

# Dataset is an abstract class, must inherit from it
class LocalDataset(Dataset):
    def __init__(self):
        self.data = torch.randn(8, 3, 3)
    
    # total length
    def __len__(self):
        return len(self.data)
    
    # how to get one item
    def __getitem__(self, idx):
        return self.data[idx]

dataset = LocalDataset()
loader = DataLoader(dataset, batch_size=4, shuffle=False, sampler=RandomSampler(dataset))

# Correct way: iterate over loader (not loader.dataset) to get batches
print("\n=== Getting batches from DataLoader ===")
for batch_idx, batch_data in enumerate(loader):
    print(f"Batch {batch_idx}:")
    print(f"  Batch shape: {batch_data.shape}")  # (batch_size, 3, 3) = (2, 3, 3)
    print(f"  Batch data:\n{batch_data}\n")

# Alternative ways to get batches:
print("\n=== Alternative methods ===")

loader_iter = iter(loader)
# Method 1: Get a single batch using next(iter(loader))
while True:
    try:
        single_batch = next(loader_iter)
        print(f"Single batch shape: {single_batch.shape}")
    except StopIteration:
        break

# Method 2: Get all batches as a list (use with caution for large datasets)
all_batches: list[torch.Tensor] = list(loader)
print(f"Total number of batches: {len(all_batches)}")
print(f"First batch shape: {all_batches[0].shape}")

# Method 3: Get specific batch by index
if len(all_batches) > 0:
    first_batch = all_batches[0]
    print(f"First batch shape: {first_batch.shape}")




=== Getting batches from DataLoader ===
Batch 0:
  Batch shape: torch.Size([4, 3, 3])
  Batch data:
tensor([[[-0.0441, -0.9239, -0.0489],
         [-1.1315, -1.0919,  0.2455],
         [ 1.4200,  0.1657,  2.0345]],

        [[-0.5055,  0.1512, -0.6610],
         [-0.9404,  0.8036,  1.2253],
         [ 0.4247,  2.4565, -0.3346]],

        [[ 0.2774,  1.7879, -1.1178],
         [ 0.2190, -0.6137, -0.9667],
         [-0.3175, -0.5048, -1.3240]],

        [[-1.8990,  0.8562, -1.6622],
         [ 0.1026,  0.8704,  1.4029],
         [ 1.1143,  0.8416,  1.1801]]])

Batch 1:
  Batch shape: torch.Size([4, 3, 3])
  Batch data:
tensor([[[-0.2032,  0.9272, -0.3205],
         [-2.5817,  0.6640,  1.7036],
         [-0.4727,  0.4048, -1.0679]],

        [[-0.5656, -0.9288, -0.1686],
         [ 0.0034,  1.2907, -2.1456],
         [ 0.6567,  0.1978,  0.6702]],

        [[-0.6937, -0.0995, -1.4037],
         [ 0.0629,  0.3643,  0.9093],
         [ 0.4192, -0.9161, -0.0240]],

        [[ 1.6903,  0.2205

How to use Compose to handle raw data

In [None]:

import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torchvision import transforms, utils

transform = transforms.Compose([
    # transforms.ToTensor(),
    # change the shape of last several dimensions
    transforms.Resize((4, 4)),
    # change data according to mean and std distribution
    transforms.Normalize(mean=0, std=1),
])

raw_data = torch.randn(1, 3, 2, 2)
print(raw_data)
print(raw_data.shape)
processed_data = transform(raw_data)    

print(processed_data)
print(processed_data.shape)

tensor([[[[-0.8062, -0.4413],
          [-0.7722, -0.2913]],

         [[-1.1599,  0.3747],
          [-0.3961, -1.7566]],

         [[ 2.4474, -0.4904],
          [ 0.2706,  1.0028]]]])
torch.Size([1, 3, 2, 2])
tensor([[[[-0.8062, -0.7150, -0.5325, -0.4413],
          [-0.7977, -0.6992, -0.5022, -0.4038],
          [-0.7807, -0.6677, -0.4418, -0.3288],
          [-0.7722, -0.6520, -0.4115, -0.2913]],

         [[-1.1599, -0.7763, -0.0090,  0.3747],
          [-0.9690, -0.7663, -0.3609, -0.1582],
          [-0.5871, -0.7462, -1.0646, -1.2238],
          [-0.3961, -0.7362, -1.4165, -1.7566]],

         [[ 2.4474,  1.7129,  0.2440, -0.4904],
          [ 1.9032,  1.3981,  0.3880, -0.1171],
          [ 0.8148,  0.7685,  0.6758,  0.6295],
          [ 0.2706,  0.4536,  0.8198,  1.0028]]]])
torch.Size([1, 3, 4, 4])
