# 8. Dataset and DataLoader

L'idea è quella di dividere il dataset in batch. Questo può essere fatto attraverso pytorch

## 8.1. basic information

In [2]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [3]:
class WineDataset(Dataset):
    def __init__(self):
        xy = np.loadtxt('wine.csv',delimiter=',',dtype=np.float32,skiprows=1)
        self.x = torch.from_numpy(xy[:,1:])
        self.y = torch.from_numpy(xy[:,[0]]) # n_samples,1
        self.n_samples = xy.shape[0]


    def __getitem__(self, index):
        return self.x[index],self.y[index]
    def __len__(self):
        return self.n_samples

In [4]:
dataset = WineDataset()

In [5]:
first_data = dataset[0]
features, labels = first_data
print(features,labels)


tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [9]:
batch_size = 4
dataloader = DataLoader(
    dataset=dataset,
    batch_size=batch_size,
    shuffle=True,
)

In [10]:
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/batch_size)
print(total_samples,n_iterations)


178 45


In [12]:

for epoch in range(num_epochs):
    for i, (inputs,labels) in enumerate(dataloader):
        if i%20 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')

epoch 1/2, step 1/45, inputs torch.Size([4, 13])
epoch 1/2, step 21/45, inputs torch.Size([4, 13])
epoch 1/2, step 41/45, inputs torch.Size([4, 13])
epoch 2/2, step 1/45, inputs torch.Size([4, 13])
epoch 2/2, step 21/45, inputs torch.Size([4, 13])
epoch 2/2, step 41/45, inputs torch.Size([4, 13])


## 8.2. Transform on dataset

In [None]:
'''
dataset = torchvision.datasets.MNIST(
    root='./data',
    transform=torchvision.transforms.ToTensor(),
)
'''

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [15]:
class WineDataset(Dataset):
    def __init__(self,transform=None):
        xy = np.loadtxt('wine.csv',delimiter=',',dtype=np.float32,skiprows=1)
        self.x = xy[:,1:]
        self.y = xy[:,[0]] # n_samples,1
        self.n_samples = xy.shape[0]

        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index],self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    
    def __len__(self):
        return self.n_samples

In [19]:
class ToTensor:
    def __call__(self,sample):
        inputs, labels = sample
        return torch.from_numpy(inputs),torch.from_numpy(labels)
    
class MulTransform:
    def __init__(self,factor):
        self.factor = factor
    def __call__(self,sample):
        inputs, labels = sample
        inputs *= self.factor
        return inputs, labels

In [16]:
dataset = WineDataset(transform=ToTensor())

In [17]:
first_data = dataset[0]
features, labels = first_data
print(features,labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [20]:
composed = torchvision.transforms.Compose([ToTensor(),MulTransform(2)])
dataset = WineDataset(transform=composed)

first_data = dataset[0]
features, labels = first_data
print(features,labels)

tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03]) tensor([1.])


# 9. Softmax and Cross Entropy

## 9.1. basic information

$$S(y_i) = \frac{\exp^{y_i}}{\sum_i \exp^{y_i}}$$

In [21]:
import torch
import torch.nn as nn
import numpy as np

In [22]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=0)

x = np.array([2.0,1.0,0.1])

outputs = softmax(x)

print('softmax numpy:',outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [23]:
x = torch.tensor([2.0,1.0,0.1])

outputs = torch.softmax(x,dim=0)

print('softmax torch:',outputs)

softmax torch: tensor([0.6590, 0.2424, 0.0986])


Cross Entropy

$$H(p,q) = -\frac{1}{N} \sum_i Y_i \log \hat{Y_i}$$

In [24]:
def cross_entropy(actual,predicted):
    loss = -np.sum(actual*np.log(predicted))
    return loss

Y = np.array([1,0,0])

Y_pred_good = np.array([0.7,0.2,0.1])

Y_pred_bad = np.array([0.1,0.3,0.6])

l1 = cross_entropy(Y,Y_pred_good)

l2 = cross_entropy(Y,Y_pred_bad)

print(f'Loss1 numpy: {l1:.4f}')

print(f'Loss2 numpy: {l2:.4f}')



Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


Nel caso della crossentropy nn.CrossEntropyLoss() fa già la softmax, quindi non è necessario aggiungerla.

Inoltre, basta mettere la giusta classe in Y, non serve l'one-hot encoding.

In [26]:

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])

# nsamples x nclasses = 1 x 3
Y_pred_good = torch.tensor([[2.0,1.0,0.1]])
Y_pred_bad = torch.tensor([[0.5,2.0,0.3]])

l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)

print(f'PyTorch Loss1: {l1.item():.4f}')

print(f'PyTorch Loss2: {l2.item():.4f}')


 

PyTorch Loss1: 0.4170
PyTorch Loss2: 1.8406


In [27]:
_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)

print(predictions1,predictions2)


tensor([0]) tensor([1])


La loss in pytorch permette di ricevere in input più di un sample alla volta

In [29]:
loss = nn.CrossEntropyLoss()

Y = torch.tensor([2,0,1])

# nsamples x nclasses = 1 x 3
Y_pred_good = torch.tensor([[2.0,1.0,10.0], [2.0,1.0,0.1],[2.0,10.0,0.1]])
Y_pred_bad = torch.tensor([[0.5,2.0,0.3],[0.5,2.0,0.3],[0.5,2.0,0.3]])

l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)

print(f'PyTorch Loss1: {l1.item():.4f}')

print(f'PyTorch Loss2: {l2.item():.4f}')

PyTorch Loss1: 0.1393
PyTorch Loss2: 1.4073


## 9.2. Neural network

This NN can be used for classification

In [None]:
class NeuralNet(nn.Module):
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet,self).__init__()
        self.l1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size,num_classes)
    def forward(self,x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no softmax !!! because it is included in the loss
        return out
    
model = NeuralNet(input_size=28*28,hidden_size=5,num_classes=3)
criterion = nn.CrossEntropyLoss()

If we have binary classification, we can use sigmoid function

In [None]:
class NeuralNetBinary(nn.Module):
    def __init__(self,input_size,hidden_size):
        super(NeuralNet,self).__init__()
        self.l1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size,1)
    def forward(self,x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # Sigmoid for binary classification
        out = torch.sigmoid(out)
        return out
    
model = NeuralNet(input_size=28*28,hidden_size=5,num_classes=3)
criterion = nn.CrossEntropyLoss()

# 10. Activation functions