In [5]:
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

In [None]:
training_data = datasets.FashionMNIST(
    root = "data", #root directory for data
    train= True, #load training dataset
    download=True,
    transform=ToTensor(), #convert image into a tensor and normalize
)

test_data = datasets.FashionMNIST(
    root="data",
    train = False,
    download = True,
    transform = ToTensor(),
)

100%|██████████| 26.4M/26.4M [02:06<00:00, 209kB/s] 
100%|██████████| 29.5k/29.5k [00:00<00:00, 322kB/s]
100%|██████████| 4.42M/4.42M [00:03<00:00, 1.47MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 24.0MB/s]


In [3]:
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")

Torch version: 2.7.1+cu118
CUDA available: True
GPU name: NVIDIA GeForce RTX 4070 Laptop GPU


In [None]:
batch_size = 64

train_data_loader = DataLoader(training_data, batch_size= batch_size) #wraps iterable
test_data_loader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_data_loader: 
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

#return a batch of 64 features and labels

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [None]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print("Using " + device + " device")

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten() #create 1D vector, like a function
        self.linear_relu_stack = nn.Sequential(#stack layers sequentially
            nn.Linear(28*28, 512), #take input of size 28x28, which is size of image in dataset
            nn.ReLU(), #add relu activation function to layer
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    
    def forward(self, x):
        x = self.flatten(x) #pass into flatten
        logits = self.linear_relu_stack(x) #pass into nn
        return logits
    
model = NeuralNetwork().to(device) #create instance of neural network class and send to gpu
print(model)#print summary of network



Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [13]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) #lr=learning rate, this uses stochastic gradient descent

In [None]:
def train(dataloader, model, lossfn, optimizer):
    size = len(dataloader.dataset) #get count of items 
    model.train() #set to training mode
    for batch, (X,y) in enumerate(dataloader): #enumerate: keep track of index
        X, y = X.to(device), y.to(device)
        pred = model(X) #invoke forward method
        loss = loss_fn(pred, y)#pass through loss function

        #do backpropogation
        loss.backward() #compute gradients
        optimizer.step() #update model parameters
        optimizer.zero_grad() #reset gradients to zero

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1)*len(X) #convert loss tensor into a float, and also calculate how many samples so far.
            print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")


In [16]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() #set to evaluation mode
    test_loss, correct = 0,0
    with torch.no_grad(): #don't use gradient computation
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred,y).item() #compare pred and y, and extract float
            correct += (pred.argmax(1) == y).type(torch.float).sum().item() #if correct, convert to 1 or 0, then add up all in batch
    test_loss /= num_batches
    correct /= size

    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")  
    

In [18]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------")
    train(train_data_loader, model, loss_fn, optimizer) #use training dataset, use neural network we defined, the loss we defined, and optimizer
    test(test_data_loader, model, loss_fn)

print("Done!")

Epoch 1
-----------------
loss:2.165380 [   64/60000]
loss:2.150838 [ 6464/60000]
loss:2.090293 [12864/60000]
loss:2.105324 [19264/60000]
loss:2.056660 [25664/60000]
loss:1.995659 [32064/60000]
loss:2.022358 [38464/60000]
loss:1.940792 [44864/60000]
loss:1.958275 [51264/60000]
loss:1.865980 [57664/60000]
Test Error: 
 Accuracy: 52.3%, Avg loss: 1.870027 

Epoch 2
-----------------
loss:1.911151 [   64/60000]
loss:1.876100 [ 6464/60000]
loss:1.757361 [12864/60000]
loss:1.792996 [19264/60000]
loss:1.693117 [25664/60000]
loss:1.645213 [32064/60000]
loss:1.658052 [38464/60000]
loss:1.560817 [44864/60000]
loss:1.594376 [51264/60000]
loss:1.477263 [57664/60000]
Test Error: 
 Accuracy: 59.1%, Avg loss: 1.503812 

Epoch 3
-----------------
loss:1.576519 [   64/60000]
loss:1.538573 [ 6464/60000]
loss:1.392740 [12864/60000]
loss:1.455864 [19264/60000]
loss:1.357539 [25664/60000]
loss:1.347363 [32064/60000]
loss:1.352243 [38464/60000]
loss:1.278604 [44864/60000]
loss:1.320377 [51264/60000]
loss:1

In [19]:
torch.save(model.state_dict(),"model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [20]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>