In [5]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
# Validation preprocessing pipeline
valid_preprocess = transforms.Compose([
    transforms.Resize(size=(224, 224)),  # Uniform image size
    transforms.ToTensor(),  # Convert to PyTorch tensor
    transforms.Normalize(
        mean=(0.485, 0.456, 0.406),  # ImageNet mean
        std=(0.229, 0.224, 0.225)    # ImageNet std
    )
])

# Training augmentation pipeline
train_augmentation = A.Compose([
    A.Resize(height=224, width=224, always_apply=True),  # Ensure 224x224 size
    A.HorizontalFlip(p=0.5),  # 50% chance of horizontal flip
    A.Rotate(limit=(-20, 20), p=0.5),  # Random rotation up to ±20 degrees
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.2),  # Brightness/contrast tweak
    A.Normalize(
        mean=(0.485, 0.456, 0.406),  # Standard normalization
        std=(0.229, 0.224, 0.225)
    ),
    ToTensorV2()  # Convert to tensor for PyTorch
])

In [None]:
# Load training and validation datasets
training_data = datasets.ImageFolder(
    root='data/train',
    transform=train_augmentation  # Using augmentation pipeline for training
)
validation_data = datasets.ImageFolder(
    root='data/valid',
    transform=valid_preprocess   # Using preprocessing pipeline for validation
)

# Create data loaders for batching
train_dataloader = DataLoader(
    dataset=training_data,
    batch_size=32,
    shuffle=True,  # Randomize training batches
    num_workers=2  # Parallel data loading
)
val_dataloader = DataLoader(
    dataset=validation_data,
    batch_size=32,
    shuffle=False,  # No shuffling for validation
    num_workers=2  # Parallel data loading
)

# Determine number of unique classes
total_classes = len(training_data.classes)
print(f"Number of classes: {total_classes}")

Num classes: 38
