In [31]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [32]:
transform = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Normalize((0.5,), (0.5,)),
])

train_set = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
test_set = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64)

In [33]:
# Define the model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [34]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

for epoch in range(1, 10):
    print(f"Epoch {epoch}")
    running_loss = 0
    correct = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        predictions = output.argmax(dim=1)
        correct += (predictions == labels).sum().item()
        
    print(f" training loss: {running_loss/len(train_loader)}")
    print(f" training accuracy: {correct/len(train_loader.dataset)}")
    
    with torch.no_grad():
        running_loss = 0
        correct = 0
        for images, labels in test_loader:
            output = model(images)
            loss = criterion(output, labels)
            running_loss += loss.item()
            predictions = output.argmax(dim=1)
            correct += (predictions == labels).sum().item()
    
    print(f" test loss: {running_loss/len(test_loader)}")
    print(f" test accuracy: {correct/len(test_loader.dataset)}")

Epoch 1
 training loss: 2.2962292343822877
 training accuracy: 0.12095
 test loss: 2.2813047800853754
 test accuracy: 0.1734
Epoch 2
 training loss: 2.263457336405447
 training accuracy: 0.25771666666666665
 test loss: 2.242021322250366
 test accuracy: 0.3259
Epoch 3
 training loss: 2.2141734402316975
 training accuracy: 0.3991
 test loss: 2.1778803965088667
 test accuracy: 0.5108
Epoch 4
 training loss: 2.1316309918218583
 training accuracy: 0.5700666666666667
 test loss: 2.0695614951431374
 test accuracy: 0.6227
Epoch 5
 training loss: 1.9917976523259047
 training accuracy: 0.6384
 test loss: 1.888092308287408
 test accuracy: 0.6635
Epoch 6
 training loss: 1.7704734783182774
 training accuracy: 0.6824333333333333
 test loss: 1.621088053770126
 test accuracy: 0.7097
Epoch 7
 training loss: 1.4861686219538708
 training accuracy: 0.72215
 test loss: 1.323789903692379
 test accuracy: 0.7479
Epoch 8
 training loss: 1.2131328408652022
 training accuracy: 0.7522166666666666
 test loss: 1.07

### Saving and loading the model

In [26]:
# Save the model
torch.save(model.state_dict(), 'model.pt')

In [27]:
# Load the model
model = Net()
model.load_state_dict(torch.load('model.pt'))

<All keys matched successfully>

In [30]:
# Check if the model is working

example = test_set[2]
img, label = example
print('Label:', label)

output = model(img)
probabilities = F.softmax(output, dim=1)
print('Probabilities:', probabilities)
print('Prediction:', torch.argmax(output))



Label: 1
Probabilities: tensor([[4.2593e-05, 9.6199e-01, 1.3530e-02, 8.3252e-03, 6.4952e-04, 4.8552e-03,
         2.3144e-03, 4.8090e-03, 2.7424e-03, 7.3933e-04]],
       grad_fn=<SoftmaxBackward0>)
Prediction: tensor(1)
