# Working with data
* PyTorch has two primitives to work with data: torch.utils.data.DataLoader and torch.utils.data.Dataset.
* Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset.

In [3]:
import torch 
from torch import nn 
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [6]:
# Download training data from open datasets
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

100.0%
100.0%
100.0%
100.0%


* We pass the Dataset as an argument to DataLoader. 
* This wraps an iterable over our dataset, and supports automatic batching, sampling, shuffling and multiprocess data loading.
* Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels.

In [7]:
batch_size = 64 
# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


# Creating Models
* To define a neural network in PyTorch, we create a class that inherits from nn.Module. We define the layers of the network in the __init__ function and specify how data will pass through the network in the forward function
* To accelerate operations in the neural network, we move it to the accelerator such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.

In [10]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


# Optimizing the Model Parameters

In [12]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [13]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad() # if we don't reset gradients, it will be sum up to the other batches

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [14]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item() # argmax(1) find the most prob in samples(row)
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [15]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.280331  [   64/60000]
loss: 2.278466  [ 6464/60000]
loss: 2.249203  [12864/60000]
loss: 2.252773  [19264/60000]
loss: 2.243618  [25664/60000]
loss: 2.199745  [32064/60000]
loss: 2.215639  [38464/60000]
loss: 2.169378  [44864/60000]
loss: 2.160173  [51264/60000]
loss: 2.137871  [57664/60000]
Test Error: 
 Accuracy: 41.9%, Avg loss: 2.128030 

Epoch 2
-------------------------------
loss: 2.130300  [   64/60000]
loss: 2.125590  [ 6464/60000]
loss: 2.058294  [12864/60000]
loss: 2.078682  [19264/60000]
loss: 2.034800  [25664/60000]
loss: 1.964374  [32064/60000]
loss: 1.999596  [38464/60000]
loss: 1.910466  [44864/60000]
loss: 1.902923  [51264/60000]
loss: 1.839734  [57664/60000]
Test Error: 
 Accuracy: 56.3%, Avg loss: 1.838361 

Epoch 3
-------------------------------
loss: 1.865883  [   64/60000]
loss: 1.836277  [ 6464/60000]
loss: 1.713645  [12864/60000]
loss: 1.757493  [19264/60000]
loss: 1.654161  [25664/60000]
loss: 1.613753  [32064/600