## Data Transforms

(based on a tutorial by Python Engineer in Youtube)
We will use a built-in dataste in this tutorial.

In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [2]:
class WineDataset(Dataset):
    def __init__(self, transform=None):
        #loading data
        #we are doing this step based on what is present in our dataset, so first you must open your dataset
        #and get to know about the different column names (checking the first row of the dataset which is the
        #header)so that you can load the data correctly. In this dataset, we have 3 different wine's categories
        #and the class label is the very first column and all the other columns are the features.
        
        # load the data
        # so the first argument is the file's path, the second item 'delimiter' it's the separation criteria
        # the third one the datatype and the last one is for skiping the first row as it is the header.
        xy = np.loadtxt('wine.csv', delimiter = ",", dtype=np.float32, skiprows=1)
        #./ are not applicable < IDKY
        self.n_samples = xy.shape[0]
        
        #split the dataset to x and y
        #IMPORTANT: we won't cast to tensor because we want to write a function for it
        self.x = xy[:, 1:]
        self.y = xy[:, [0]] #size = n_samples * 1, it's important to know the shape/size of
                                              #the inputs/outputs/variables we want to use.
        #to store the transform
        self.transform = transform
        
        
        
    def __getitem__(self, index):
        #indexing data: dataset[0]
        sample = self.x[index], self.y[index]   # will return a tuple
        
        if self.transform:                      # if it was not none 
            sample = self.transform(sample)

        return sample
        
    def __len__(self):
        #length of the dataset: len(dataset)
        return self.n_samples

In [3]:
#now we can create some costume transform
class ToTensor:
    
    def __call__(self, sample):   #now it's a callable object
        #first unpack the samples
        inputs, targets = sample
        
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [4]:
dataset = WineDataset()            #without transform

first_data = dataset[0]            #get the first item
features, labels = first_data      #unpack the first data
print(type(features),type(labels)) #they are numpy arrays

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [15]:
dataset = WineDataset(transform=ToTensor()) #using the transform
 
first_data = dataset[0]
features, labels = first_data
print(features)
print(type(features),type(labels))          #they are torch tensor now 

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>


In [12]:
#the second transform example
class MulTransform:
    def __init__(self, factor):
        self.factor = factor
    
    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        
        return inputs, targets  #tuple

In [18]:
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)]) #pay attention to the arg, it must be a list[]
dataset = WineDataset(transform=composed)

first_data = dataset[0]
features, labels = first_data
print(features)  # so every element has been multiplied by 2
print(type(features),type(labels)) 

tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>


In [9]:
torchvision.transforms.Compose?