In [6]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import torch
#import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
# CHANGE TINYPROP HYPERPARAMS IN THIS MODULE!!!
from tinyProp_modules import TinyPropLinear

from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt

In [7]:
n_epochs = 5
batch_size_train = 20
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.manual_seed(random_seed)

<torch._C.Generator at 0x26a9ae19330>

In [8]:
train_set = torchvision.datasets.FashionMNIST("./data", download=True, transform=
                                                transforms.Compose([transforms.ToTensor()]))
test_set = torchvision.datasets.FashionMNIST("./data", download=True, train=False, transform=
                                               transforms.Compose([transforms.ToTensor()]))  
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size_train)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size_test)
def output_label(label):
    output_mapping = {
                 0: "T-shirt/Top",
                 1: "Trouser",
                 2: "Pullover",
                 3: "Dress",
                 4: "Coat", 
                 5: "Sandal", 
                 6: "Shirt",
                 7: "Sneaker",
                 8: "Bag",
                 9: "Ankle Boot"
                 }
    input = (label.item() if type(label) == torch.Tensor else label)
    return output_mapping[input]

In [9]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = TinyPropLinear(28*28, 32, 1)  # last argument being layer_idx (0 for last layer)
        self.fc2 = TinyPropLinear(32, 10, 0)
    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

In [10]:
def train(network, epoch):
    network.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        # log
        fc1_meanBpr[epoch-1] += network.fc1.info.miniBatchBpr
        fc1_meanK[epoch-1] += network.fc1.info.miniBatchK
        fc2_meanBpr[epoch-1] += network.fc2.info.miniBatchBpr
        fc2_meanK[epoch-1] += network.fc2.info.miniBatchK
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()), end ='\r')
            train_losses.append(loss.item())
            train_counter.append((batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
    print('Train Epoch: {} completed          '.format(epoch))
    
    fc1_meanBpr[epoch-1] /= len(train_loader.dataset)//batch_size_train
    fc1_meanK[epoch-1]   /= len(train_loader.dataset)//batch_size_train
    fc2_meanBpr[epoch-1] /= len(train_loader.dataset)//batch_size_train
    fc2_meanK[epoch-1]   /= len(train_loader.dataset)//batch_size_train
    
    
def test(network):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)
    print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [11]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

fc1_meanBpr = torch.zeros([n_epochs])
fc1_meanK = torch.zeros([n_epochs])
fc2_meanBpr = torch.zeros([n_epochs])
fc2_meanK = torch.zeros([n_epochs])

#fc1_weights = torch.zeros([n_epochs, 784*32])
#fc1_loc_err = torch.zeros([n_epochs, 32])
#fc1_Y       = torch.zeros([n_epochs])
#fc2_weights = torch.zeros([n_epochs,  32*10])
#fc2_loc_err = torch.zeros([n_epochs, 10])

In [12]:
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)

test(network)
for epoch in range(1, n_epochs + 1):
    train(network, epoch)
    test(network)



Test set: Avg. loss: 2.4303, Accuracy: 1098/10000 (11%)

Train Epoch: 1 completed          ]	Loss: 1.041449
Test set: Avg. loss: 1.2552, Accuracy: 6941/10000 (69%)

Train Epoch: 2 completed          ]	Loss: 0.618709
Test set: Avg. loss: 2.4894, Accuracy: 6688/10000 (67%)

Train Epoch: 3 completed          ]	Loss: 0.936955
Test set: Avg. loss: 1.5130, Accuracy: 7647/10000 (76%)

Train Epoch: 4 completed          ]	Loss: 1.098873
Test set: Avg. loss: 1.4026, Accuracy: 7891/10000 (79%)

Train Epoch: 5 completed          ]	Loss: 0.208076
Test set: Avg. loss: 2.5297, Accuracy: 7168/10000 (72%)



In [13]:
print(fc1_meanBpr)
print(fc2_meanBpr)

tensor([0.2456, 0.1747, 0.1473, 0.1349, 0.1255])
tensor([0.3757, 0.2779, 0.2549, 0.2386, 0.2275])


In [14]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')

Text(0, 0.5, 'negative log likelihood loss')