In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.RandomRotation(10),
    transforms.RandomPerspective(distortion_scale=0.3),
    transforms.RandomResizedCrop(28, scale=(0.9, 1.5))
])

train_set = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
test_set = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64)

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
class CNN(nn.Module):
    def __init__(self):  
        super(CNN, self).__init__()  
        self.conv1 = nn.Sequential(  
             nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2) # (16,14,14)
         )
        self.conv2 = nn.Sequential( # (16,14,14)
             nn.Conv2d(16, 32, 5, 1, 2), # (32,14,14)
             nn.ReLU(),
             nn.MaxPool2d(2) # (32,7,7)
         )
        self.out = nn.Linear(32*7*7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # (batch, 32,7,7) -> (batch, 32*7*7)
        output = self.out(x)
        return output

In [1]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1, 10):
    print(f"Epoch {epoch}")
    running_loss = 0
    correct = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        predictions = output.argmax(dim=1)
        correct += (predictions == labels).sum().item()
        
    print(f" training loss: {running_loss/len(train_loader)}")
    print(f" training accuracy: {correct/len(train_loader.dataset)}")
    
    with torch.no_grad():
        running_loss = 0
        correct = 0
        for images, labels in test_loader:
            output = model(images)
            loss = criterion(output, labels)
            running_loss += loss.item()
            predictions = output.argmax(dim=1)
            correct += (predictions == labels).sum().item()
    
    print(f" test loss: {running_loss/len(test_loader)}")
    print(f" test accuracy: {correct/len(test_loader.dataset)}")

NameError: name 'nn' is not defined

### Saving and loading the model

In [26]:
# Save the model
torch.save(model.state_dict(), 'model.pt')

In [6]:
# Load the model
model = CNN()
model.load_state_dict(torch.load('model.pt', map_location=torch.device('cpu') ))

<All keys matched successfully>

In [13]:
# Check if the model is working

example = test_set[2]
img, label = example
img = img.unsqueeze(0)
print('Label:', label)

output = model(img)
probabilities = F.softmax(output, dim=1)
print('Probabilities:', probabilities)
print('Prediction:', torch.argmax(output))


Label: 1
Probabilities: tensor([[4.3807e-10, 1.0000e+00, 5.6370e-10, 1.1780e-11, 4.1226e-07, 1.4320e-08,
         3.0005e-08, 6.5777e-08, 6.5056e-09, 6.9205e-10]],
       grad_fn=<SoftmaxBackward0>)
Prediction: tensor(1)
