In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms

NUM_EPOCHS = 30
VALID_DATA_SIZE = 5_000
CLASS_NAMES = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

train_data = datasets.FashionMNIST("./", download=True, train=True, transform=transforms.ToTensor())
test_data = datasets.FashionMNIST("./", download=True, train=False, transform=transforms.ToTensor())

valid_set = Subset(train_data, torch.arange(VALID_DATA_SIZE))
train_set = Subset(train_data, torch.arange(VALID_DATA_SIZE, len(train_data)))

train = DataLoader(train_set, batch_size=32, shuffle=True)
valid = DataLoader(valid_set, batch_size=32, shuffle=True)
test = DataLoader(test_data, batch_size=32, shuffle=True)

model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28 * 28, 300),
    nn.ReLU(),
    nn.Linear(300, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

print(f"Train on {len(train_data) - VALID_DATA_SIZE}, validate on {VALID_DATA_SIZE} samples")
for epoch in range(NUM_EPOCHS):
    total_loss = 0
    accuracy = 0
    for datas, labels in train:
        datas = datas.cuda()
        labels = labels.cuda()
        result: torch.Tensor = model(datas)
        answer = result.argmax(dim=1)
        correct = torch.sum(answer == labels)
        loss: torch.Tensor = criterion(result, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        accuracy += correct.item()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}, loss: {total_loss / len(train)}, accuracy: {accuracy / len(train_set)}", end=' ')
    with torch.no_grad():
        total_loss = 0
        accuracy = 0
        for datas, labels in valid:
            datas = datas.cuda()
            labels = labels.cuda()
            result: torch.Tensor = model(datas)
            answer = result.argmax(dim=1)
            correct = torch.sum(answer == labels)
            loss: torch.Tensor = criterion(result, labels)
            accuracy += correct.item()
            total_loss += loss.item()
        print(f"val_loss: {total_loss / len(valid)}, val_accuracy: {accuracy / len(valid_set)}")

with torch.no_grad():
    total_loss = 0
    accuracy = 0
    for datas, labels in test:
        datas = datas.cuda()
        labels = labels.cuda()
        result: torch.Tensor = model(datas)
        answer = result.argmax(dim=1)
        correct = torch.sum(answer == labels)
        loss: torch.Tensor = criterion(result, labels)
        accuracy += correct.item()
        total_loss += loss.item()
    print(f"test_loss: {total_loss / len(test)}, test_accuracy: {accuracy / len(test_data)}")


Train on 55000, validate on 5000 samples
Epoch 1, loss: 1.1316502030121858, accuracy: 0.6232727272727273 val_loss: 0.6769441383279813, val_accuracy: 0.757
Epoch 2, loss: 0.6036612820476207, accuracy: 0.7859636363636363 val_loss: 0.5395136046561466, val_accuracy: 0.8142
Epoch 3, loss: 0.514902888818353, accuracy: 0.8193272727272727 val_loss: 0.48102845640698816, val_accuracy: 0.833
Epoch 4, loss: 0.473003404580139, accuracy: 0.8340727272727273 val_loss: 0.44933794884924677, val_accuracy: 0.8426
Epoch 5, loss: 0.44927452446122446, accuracy: 0.8424363636363636 val_loss: 0.4314402946431166, val_accuracy: 0.8496
Epoch 6, loss: 0.42828405559617744, accuracy: 0.8492363636363637 val_loss: 0.4392118185378943, val_accuracy: 0.8448
Epoch 7, loss: 0.41242473550821335, accuracy: 0.8559636363636364 val_loss: 0.40767952012028663, val_accuracy: 0.8602
Epoch 8, loss: 0.39993671059105684, accuracy: 0.8597090909090909 val_loss: 0.4008289171252281, val_accuracy: 0.8578
Epoch 9, loss: 0.3878830282397919, a