In [1]:
#link to tutorial: https://docs.pytorch.org/tutorials/beginner/basics/transforms_tutorial.html

In [2]:
# data doesnt come in the final processed form required for ML algos
# use transforms to perform some manipulation of the data to make it suitable
# to make it suitable for training. all torchvision datasets have two 
# parameters [transform] to modify the features and [target_transform]
# to modify the labels- that accept callables containing the transformation
# logic. the torchvision.transforms module offers commonly used transforms
# outside the box.

# the fashionMNIST features are in PIL Image format, and the labels are 
# integers. for training, we need the features as normalized tensors
# and the labels as one hot encoded tensors. to make these transformation
# we use ToTensor and Lambda

In [5]:
import torch 
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

ds = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)))

In [6]:
#ToTensor converts a PIL image or numpy ndarray into a floattensor and 
# scales the images pixel intensity values in the range [0.,1.]

In [8]:
#Lambda transforms apply any user defined lambda function. here we define
# a function to turn the integer into a one hot encoded tensor.
# it first creates a zero tensor of size 10 (the number of labels in
# the dataset) and calls scatter_ which assigns a value=1 on the index
# as given by label y

In [10]:
torch.zeros(10)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])