<a href="https://colab.research.google.com/github/meetgandhi123/PyTorch-Basic-Concepts/blob/main/04_Dataset_and_DataLoader_Batch_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Previously we were using PyTorch inbuilt datasets, in this part we will be loading dataset from a CSV file.  

In [3]:
import torch 
import torchvision
from torch.utils.data import DataLoader, Dataset
import numpy as np
import math

class WineDataset(Dataset):
    def __init__(self):
        xy = np.loadtxt('/content/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:,1:])
        self.y = torch.from_numpy(xy[:,[0]])
        self.n_samples = xy.shape[0]
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    def __len__(self,):
        return self.n_samples

dataset = WineDataset()
'''
first_data = dataset[0]
features , label = first_data
print(features , label)
    tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])
'''

dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)
'''
dataiter=iter(dataloader)
data = dataiter.next()
feature, label = data
print(feature, label)
tensor([[1.2470e+01, 1.5200e+00, 2.2000e+00, 1.9000e+01, 1.6200e+02, 2.5000e+00,
         2.2700e+00, 3.2000e-01, 3.2800e+00, 2.6000e+00, 1.1600e+00, 2.6300e+00,
         9.3700e+02],
        [1.2080e+01, 1.1300e+00, 2.5100e+00, 2.4000e+01, 7.8000e+01, 2.0000e+00,
         1.5800e+00, 4.0000e-01, 1.4000e+00, 2.2000e+00, 1.3100e+00, 2.7200e+00,
         6.3000e+02],
        [1.4340e+01, 1.6800e+00, 2.7000e+00, 2.5000e+01, 9.8000e+01, 2.8000e+00,
         1.3100e+00, 5.3000e-01, 2.7000e+00, 1.3000e+01, 5.7000e-01, 1.9600e+00,
         6.6000e+02],
        [1.3830e+01, 1.5700e+00, 2.6200e+00, 2.0000e+01, 1.1500e+02, 2.9500e+00,
         3.4000e+00, 4.0000e-01, 1.7200e+00, 6.6000e+00, 1.1300e+00, 2.5700e+00,
         1.1300e+03]]) tensor([[2.],
        [2.],
        [3.],
        [1.]])
'''


tensor([[1.2470e+01, 1.5200e+00, 2.2000e+00, 1.9000e+01, 1.6200e+02, 2.5000e+00,
         2.2700e+00, 3.2000e-01, 3.2800e+00, 2.6000e+00, 1.1600e+00, 2.6300e+00,
         9.3700e+02],
        [1.2080e+01, 1.1300e+00, 2.5100e+00, 2.4000e+01, 7.8000e+01, 2.0000e+00,
         1.5800e+00, 4.0000e-01, 1.4000e+00, 2.2000e+00, 1.3100e+00, 2.7200e+00,
         6.3000e+02],
        [1.4340e+01, 1.6800e+00, 2.7000e+00, 2.5000e+01, 9.8000e+01, 2.8000e+00,
         1.3100e+00, 5.3000e-01, 2.7000e+00, 1.3000e+01, 5.7000e-01, 1.9600e+00,
         6.6000e+02],
        [1.3830e+01, 1.5700e+00, 2.6200e+00, 2.0000e+01, 1.1500e+02, 2.9500e+00,
         3.4000e+00, 4.0000e-01, 1.7200e+00, 6.6000e+00, 1.1300e+00, 2.5700e+00,
         1.1300e+03]]) tensor([[2.],
        [2.],
        [3.],
        [1.]])


### Adding transform in custom dataset generation. 

In [6]:
import torch 
import torchvision
from torch.utils.data import DataLoader, Dataset
import numpy as np
import math

class WineDataset(Dataset):
    def __init__(self, transform=None):
        xy = np.loadtxt('/content/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = xy[:,1:]
        self.y = xy[:,[0]]
        self.n_samples = xy.shape[0]
        self.transform = transform
    def __getitem__(self,index):        
        sample = self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    def __len__(self,):
        return self.n_samples

class ToTensor:
    def __call__(self,sample):
        input,target = sample
        return torch.from_numpy(input), torch.from_numpy(target)    

class MulTransform:
    def __init__(self,number):
        self.number = number

    def __call__(self,sample):
        input,target = sample
        input = input*self.number
        return input, target    

dataset = WineDataset()
first_data = dataset[0]
input, target = first_data
print(type(input), type(target))
# <class 'numpy.ndarray'> <class 'numpy.ndarray'>

dataset = WineDataset(transform=ToTensor())
first_data = dataset[0]
input, target = first_data
print(type(input), type(target))
# <class 'torch.Tensor'> <class 'torch.Tensor'>

dataset = WineDataset(transform=ToTensor())
first_data = dataset[0]
input, target = first_data
print(input)
print(type(input), type(target))
# <class 'torch.Tensor'> <class 'torch.Tensor'>

composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
dataset = WineDataset(transform=composed)
first_data = dataset[0]
input, target = first_data
print(input)
print(type(input), type(target))


<class 'numpy.ndarray'> <class 'numpy.ndarray'>
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>


### Using Logistic Regression on winedataset.

In [49]:
import torch 
import torchvision
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import math

# device config.
Device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device usage: {Device}')

class WineDataset(Dataset):
    def __init__(self, transform=None):
        xy = np.loadtxt('/content/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = xy[:,1:]
        self.y = xy[:,[0]]
        self.n_samples = xy.shape[0]
        self.transform = transform
    def __getitem__(self,index):        
        sample = self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    def __len__(self,):
        return self.n_samples

class ToTensor:
    def __call__(self,sample):
        input,target = sample
        return torch.from_numpy(input), torch.from_numpy(target)    

# Hyper Parameter
input_size = 13
hidden_size = 5
batch_size = 10
num_epoch = 20
num_classes = 3
learning_rate = 0.001

dataset = WineDataset(transform=ToTensor())
dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2)

sample = iter(dataloader)
input, output = sample.next()
# print(input.shape, output.shape)
# torch.Size([4, 13]) torch.Size([4, 1])


# Model
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out

model = NeuralNet(input_size, hidden_size, num_classes)

# Loss and Optimizers
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Training loop
num_step = len(dataloader)
for epoch in range(num_epoch):
    for i, (x,labels) in enumerate(dataloader):
        x = x.to(Device)
        labels_shape = labels.shape[0]
        labels = labels.view(labels_shape).long()
        labels -= 1
        labels = labels.to(Device)

        # Forward
        output = model(x)
        loss = criterion(output,labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1)%6==0:
            print(f'Epoch: {epoch+1} / {num_epoch}, step: {i+1} / {num_step}, loss: {loss.item():.4f}')

# Testing
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for x, labels in dataloader:
        x = x.to(Device)
        labels = labels.to(Device)
        output = model(x)

        # Value, Index
        _, predictions = torch.max(output,1)
        labels_shape = labels.shape[0]
        labels = labels.view(labels_shape)
        labels -= 1
        n_samples += labels.shape[0]
        n_correct += (predictions==labels).sum().item()

    acc = 100.0 * n_correct/n_samples
    print(f'accuract: {acc}')


Device usage: cpu
Epoch: 1 / 20, step: 6 / 18, loss: 62.4753
Epoch: 1 / 20, step: 12 / 18, loss: 41.8435
Epoch: 1 / 20, step: 18 / 18, loss: 48.6746
Epoch: 2 / 20, step: 6 / 18, loss: 42.1055
Epoch: 2 / 20, step: 12 / 18, loss: 45.0281
Epoch: 2 / 20, step: 18 / 18, loss: 31.9202
Epoch: 3 / 20, step: 6 / 18, loss: 34.1576
Epoch: 3 / 20, step: 12 / 18, loss: 24.8979
Epoch: 3 / 20, step: 18 / 18, loss: 19.7019
Epoch: 4 / 20, step: 6 / 18, loss: 9.3674
Epoch: 4 / 20, step: 12 / 18, loss: 29.4662
Epoch: 4 / 20, step: 18 / 18, loss: 15.8366
Epoch: 5 / 20, step: 6 / 18, loss: 19.4614
Epoch: 5 / 20, step: 12 / 18, loss: 15.1050
Epoch: 5 / 20, step: 18 / 18, loss: 17.8579
Epoch: 6 / 20, step: 6 / 18, loss: 6.6785
Epoch: 6 / 20, step: 12 / 18, loss: 8.1702
Epoch: 6 / 20, step: 18 / 18, loss: 0.8855
Epoch: 7 / 20, step: 6 / 18, loss: 2.9533
Epoch: 7 / 20, step: 12 / 18, loss: 1.8608
Epoch: 7 / 20, step: 18 / 18, loss: 2.1662
Epoch: 8 / 20, step: 6 / 18, loss: 1.1837
Epoch: 8 / 20, step: 12 / 18, 