In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn.functional as F
import torch.optim as optim
# CHANGE TINYPROP HYPERPARAMS IN THIS MODULE!!!
from tinyProp_modules import TinyPropLinear

from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt

In [2]:
n_epochs = 5
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x1fcd2b74130>

In [4]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [7]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = TinyPropLinear(28*28, 32, 1)   # last argument being layer_idx (0 for last layer)
        self.fc2 = TinyPropLinear(32, 10, 0)
    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

In [8]:
def train(network, epoch):
    network.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        # log
        fc1_meanBpr[epoch-1] += network.fc1.info.miniBatchBpr
        fc1_meanK[epoch-1] += network.fc1.info.miniBatchK
        fc2_meanBpr[epoch-1] += network.fc2.info.miniBatchBpr
        fc2_meanK[epoch-1] += network.fc2.info.miniBatchK
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()), end ='\r')
            train_losses.append(loss.item())
            train_counter.append((batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
    print('Train Epoch: {} completed          '.format(epoch))
    
    fc1_meanBpr[epoch-1] /= len(train_loader.dataset)//batch_size_train
    fc1_meanK[epoch-1]   /= len(train_loader.dataset)//batch_size_train
    fc2_meanBpr[epoch-1] /= len(train_loader.dataset)//batch_size_train
    fc2_meanK[epoch-1]   /= len(train_loader.dataset)//batch_size_train
    
    
def test(network):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)
    print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [9]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

fc1_meanBpr = torch.zeros([n_epochs])
fc1_meanK = torch.zeros([n_epochs])
fc2_meanBpr = torch.zeros([n_epochs])
fc2_meanK = torch.zeros([n_epochs])

In [10]:
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)

test(network)
for epoch in range(1, n_epochs + 1):
    train(network, epoch)
    test(network)



Test set: Avg. loss: 3.2852, Accuracy: 1125/10000 (11%)

Train Epoch: 1 completed          	Loss: 0.450822
Test set: Avg. loss: 0.4069, Accuracy: 8779/10000 (88%)

Train Epoch: 2 completed          	Loss: 1.422083
Test set: Avg. loss: 0.9442, Accuracy: 8523/10000 (85%)

Train Epoch: 3 completed          	Loss: 2.291387
Test set: Avg. loss: 1.5549, Accuracy: 8345/10000 (83%)

Train Epoch: 4 completed          	Loss: 1.165662
Test set: Avg. loss: 1.7118, Accuracy: 8521/10000 (85%)

Train Epoch: 5 completed          	Loss: 1.257415
Test set: Avg. loss: 0.9398, Accuracy: 9172/10000 (92%)



In [11]:
print(fc1_meanBpr)
print(fc2_meanBpr)

tensor([0.1590, 0.1255, 0.0697, 0.0682, 0.0667])
tensor([0.2728, 0.0961, 0.0757, 0.0638, 0.0559])


In [12]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')

Text(0, 0.5, 'negative log likelihood loss')