In [1]:
import torch
from torchvision.datasets import MNIST

In [3]:
data_train = MNIST('.', train=True, download=True)
data_test = MNIST('.', train=False, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [4]:
len(data_train)

60000

In [5]:
from torch.utils.data import Dataset, DataLoader

In [6]:
from torch import nn

In [7]:
class MLP(nn.Module):
  def __init__(self, in_features, num_classes, hidden_size):
    super().__init__()
    self.model = nn.Sequential(
      nn.Linear(in_features, hidden_size),
      nn.ReLU(),
      nn.Linear(hidden_size, hidden_size),
      nn.LeakyReLU(0.1),
      nn.Linear(hidden_size, num_classes)   
    )
  def forward(self, x):
    return self.model(x)
  


In [8]:
IMG_SIZE = 28
model = MLP(IMG_SIZE ** 2, 10, 64)
model

MLP(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): LeakyReLU(negative_slope=0.1)
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [9]:
model.model[0].weight.shape

torch.Size([64, 784])

In [10]:
model.model[0].bias

Parameter containing:
tensor([ 0.0296,  0.0207, -0.0091, -0.0154, -0.0143, -0.0044, -0.0167, -0.0284,
        -0.0094,  0.0204,  0.0034, -0.0160,  0.0298,  0.0006,  0.0258, -0.0322,
        -0.0197,  0.0192, -0.0247,  0.0336, -0.0160,  0.0097,  0.0271,  0.0321,
         0.0231,  0.0226,  0.0079,  0.0134,  0.0221, -0.0054, -0.0245,  0.0087,
        -0.0026,  0.0238, -0.0348, -0.0029,  0.0096, -0.0082, -0.0234, -0.0033,
         0.0350, -0.0293, -0.0200,  0.0189,  0.0171,  0.0117,  0.0153,  0.0215,
        -0.0316, -0.0004,  0.0210,  0.0326, -0.0307, -0.0300, -0.0117,  0.0060,
         0.0190,  0.0019, -0.0326,  0.0120, -0.0237, -0.0052, -0.0137,  0.0230],
       requires_grad=True)

In [17]:
import torchvision.transforms

data_train = MNIST('.', train=True, transform=torchvision.transforms.ToTensor(), download=True)
data_test = MNIST('.', train=False, transform=torchvision.transforms.ToTensor(), download=True)

In [18]:
train_loader = DataLoader(data_train, batch_size=64, shuffle=True)
test_loader = DataLoader(data_test, batch_size=64, shuffle=False)

In [21]:
for images, labels in train_loader:
  print(images.shape, labels.shape)
  break

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [23]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [24]:
criterion = nn.CrossEntropyLoss()

In [27]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
NUM_EPOCHS = 10
for epoch in range(NUM_EPOCHS):
  for images, labels in train_loader:
    images = images.to(device)
    labels = labels.to(device)
    
    optimizer.zero_grad()
    logits = model(images.reshape(-1, IMG_SIZE ** 2))

    loss = criterion(logits, labels)
    loss.backward()

    optimizer.step()

RuntimeError: ignored