# Dataset Transforms

In [None]:
import torch
import torchvision

dataset = torchvision.datasets.MNIST(
    root='./data', transform=torchvision.transforms.ToTensor(), download=True)

Transforms can be applied to PIL Images, tensors, ndarrays, or custom data when creating the dataset.

In [13]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

# creating a custom class for our dataset, which inherits from Dataset.
class WineDataset(Dataset):

    # this function is used for data loading
    def __init__(self, transform=None):
      # data loading
      xy = np.loadtxt('./wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
      self.n_samples = xy.shape[0]

      # note that we do not convert to tensor here.
      self.x = xy[:, 1:]  # the first column is the output label
      self.y = xy[:, [0]] # n_samples, 1
      
      self.transform = transform

    # this function allows indexing in our dataset
    def __getitem__(self, index):
      sample = self.x[index], self.y[index]

      # apply transformation if available.
      if self.transform:
        sample = self.transform(sample)

      return sample

    # this allows us to call len on our dataset.
    def __len__(self):
      return self.n_samples

The ```__call__``` method enables Python programmers to write classes where the instances behave like functions and can be called like a function. 

```
class Example:
	def __init__(self):
		print("Instance Created")
	
	# Defining __call__ method
	def __call__(self):
		print("Instance is called via special method")

# Instance created
e = Example()

# __call__ method will be called
e()
```

In [17]:
# Writing our custom transform class. This converts the numpy array to tensor.
class ToTensor:
  # this function is described above.
  def __call__(self, sample):
    inputs, labels = sample
    return torch.from_numpy(inputs), torch.from_numpy(labels)

dataset = WineDataset(transform=ToTensor())
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features)

<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])


In [19]:
# all features to be multiplied by the factor passed into the object.
class MulTransform:
  def __init__(self, factor):
    self.factor = factor

  def __call__(self, sample):
    features, labels = sample
    features *= self.factor
    return features, labels

In [21]:
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
dataset1 = WineDataset(transform=composed)

first_data = dataset1[0]
features, labels = first_data
print(type(features), type(labels))
print(features)

<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])
