# PyTorch Tutorial 02: Working with Datasets

In [1]:
import torch
import torchvision
import numpy as np
import math

In [5]:
class WineDataset(torch.utils.data.Dataset):
    
    def __init__(self) -> None:
        raw_data = np.loadtxt('../data/wine/wine.data', delimiter=',', dtype=np.float32)
        self.x = torch.from_numpy(raw_data[:, 1:])
        self.y = torch.from_numpy(raw_data[:, [0]])
        self.n_samples = self.x.shape[0]
        self.n_features = self.x.shape[1]
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

## Use dataset

In [6]:
dataset = WineDataset()
x_sample, y_sample = dataset[0]
print (f"x:{ x_sample}, y: {y_sample}")

x:tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]), y: tensor([1.])


## Dataloader

In [9]:
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=16, shuffle=True, num_workers=2)
dataiter = iter(dataloader)
data = next(dataiter)
x, y = data
print (x, y)

tensor([[1.3740e+01, 1.6700e+00, 2.2500e+00, 1.6400e+01, 1.1800e+02, 2.6000e+00,
         2.9000e+00, 2.1000e-01, 1.6200e+00, 5.8500e+00, 9.2000e-01, 3.2000e+00,
         1.0600e+03],
        [1.1660e+01, 1.8800e+00, 1.9200e+00, 1.6000e+01, 9.7000e+01, 1.6100e+00,
         1.5700e+00, 3.4000e-01, 1.1500e+00, 3.8000e+00, 1.2300e+00, 2.1400e+00,
         4.2800e+02],
        [1.4340e+01, 1.6800e+00, 2.7000e+00, 2.5000e+01, 9.8000e+01, 2.8000e+00,
         1.3100e+00, 5.3000e-01, 2.7000e+00, 1.3000e+01, 5.7000e-01, 1.9600e+00,
         6.6000e+02],
        [1.1610e+01, 1.3500e+00, 2.7000e+00, 2.0000e+01, 9.4000e+01, 2.7400e+00,
         2.9200e+00, 2.9000e-01, 2.4900e+00, 2.6500e+00, 9.6000e-01, 3.2600e+00,
         6.8000e+02],
        [1.4130e+01, 4.1000e+00, 2.7400e+00, 2.4500e+01, 9.6000e+01, 2.0500e+00,
         7.6000e-01, 5.6000e-01, 1.3500e+00, 9.2000e+00, 6.1000e-01, 1.6000e+00,
         5.6000e+02],
        [1.3050e+01, 5.8000e+00, 2.1300e+00, 2.1500e+01, 8.6000e+01, 2.6200e+00,

# Transforms

In [29]:
class WineDataset(torch.utils.data.Dataset):
    
    def __init__(self, transforms=[]) -> None:
        raw_data = np.loadtxt('../data/wine/wine.data', delimiter=',', dtype=np.float32)
        self.x = raw_data[:, 1:]
        self.y = raw_data[:, [0]]
        self.n_samples = self.x.shape[0]
        self.n_features = self.x.shape[1]
        self.transforms = transforms
    
    def __getitem__(self, index):
        sample = (self.x[index], self.y[index])
        for transform in self.transforms:
            sample = transform(sample)
        print (sample)
        return sample
    
    def __len__(self):
        return len(self.x)

# Writing some custom transformers

In [34]:
class ToTensorTransformer:
    def __call__(self, sample):
        x, y = sample
        return torch.from_numpy(x), torch.from_numpy(y)

class MultiplierTransformer:
    
    def __init__(self, factor: float):
        self.factor = factor
        
    def __call__(self, sample):
        x, y = sample
        x = x * self.factor
        return x, y

In [36]:
dataset = WineDataset(transforms=[ToTensorTransformer(), MultiplierTransformer(10)])
x_sample, y_sample = dataset[0]
print (f"x:{ x_sample}, y: {y_sample}")

(tensor([1.4230e+02, 1.7100e+01, 2.4300e+01, 1.5600e+02, 1.2700e+03, 2.8000e+01,
        3.0600e+01, 2.8000e+00, 2.2900e+01, 5.6400e+01, 1.0400e+01, 3.9200e+01,
        1.0650e+04]), tensor([1.]))
x:tensor([1.4230e+02, 1.7100e+01, 2.4300e+01, 1.5600e+02, 1.2700e+03, 2.8000e+01,
        3.0600e+01, 2.8000e+00, 2.2900e+01, 5.6400e+01, 1.0400e+01, 3.9200e+01,
        1.0650e+04]), y: tensor([1.])
