In [103]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
%run tinyProp.py    # import all classes and methods

In [83]:
batch_size_train = 20
batch_size_test = 1000
random_seed = 1
torch.manual_seed(random_seed)

# Import dataset
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)


print(len(test_loader))

10


In [104]:
# Hyperparameters
n_epochs = 5
batch_size_train = 20
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5

# set tinyProp parameters
tpParams = TinyPropParams(0, 1, 1, 5)   # S_min, S_max, zeta, N_layers


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # define layers
        self.conv1 = nn.Conv2d(1, 6, 5)                     # normal layer
        self.conv2 = TinyPropConv2d(6, 12, 5, tpParams, 2)  # TinyProp-layer

        self.fc1 = TinyPropLinear(192, 120, tpParams, 3)    # TinyProp-layer
        self.fc2 = nn.Linear(120, 60)                       # normal layer
        self.fc3 = nn.Linear(60, 10)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = x.view(-1, 12 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x

In [106]:
model = Net()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
loss = F.cross_entropy

# you can evaluate the untrained model to get baseline values
test_loss, test_acc = evaluate(model, loss, test_loader)

for epoch in range(1, n_epochs + 1):
    # train one epoch and retrieve average loss and accuracy. You may safe them for plotting
    train_loss, train_acc = trainOneEpoch(model, optimizer, loss, train_loader, epoch)

    # it is good practise to evaluate on the test set every epoch
    test_loss, test_acc = evaluate(model, loss, test_loader)

Train Epoch: 1 completed            Loss: 1.714375
Test  Eval : Avg.loss: 1.8931,  Accuracy: 7615/10000 (76%)

Train Epoch: 2 completed            Loss: 1.710115
Test  Eval : Avg.loss: 1.8705,  Accuracy: 7778/10000 (78%)

Train Epoch: 3 completed            Loss: 1.577805
Test  Eval : Avg.loss: 1.7659,  Accuracy: 8733/10000 (87%)

Train Epoch: 4 completed            Loss: 1.629576
Test  Eval : Avg.loss: 1.7506,  Accuracy: 8862/10000 (89%)

Train Epoch: 5 completed            Loss: 1.610686
Test  Eval : Avg.loss: 1.7553,  Accuracy: 8824/10000 (88%)



In [107]:
# after the training you can obtain the mean epoch bpr, k directly from the TinyPropLayers (one entry per epoch)

print(model.conv2.epochBpr)
print(model.fc1.epochBpr)

[0.3733935765587458, 0.03317278867515479, 0.03061097189790091, 0.02132896923838358, 0.017466703762619825]
[0.37972983315359726, 0.04404241844240241, 0.037439452266092646, 0.024121287349162027, 0.019539459619592043]
