In [27]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt

In [431]:
n_epochs = 5
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x19a194be110>

In [329]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [379]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 32)
        self.fc2 = nn.Linear(32, 10)
    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

In [420]:
def train(network, epoch):
    network.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = network(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        fc1_weights[epoch-1] += torch.abs(torch.flatten(network.fc1.weight.grad))
        fc2_weights[epoch-1] += torch.abs(torch.flatten(network.fc2.weight.grad))
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()), end ='\r')
            train_losses.append(loss.item())
            train_counter.append(
            (batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
    print('Train Epoch: {} completed          '.format(epoch))
    
def test(network):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)
    print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [432]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

fc1_weights = torch.zeros([n_epochs, 784*32])
fc2_weights = torch.zeros([n_epochs,  32*10])

In [433]:
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)

test(network)
for epoch in range(1, n_epochs + 1):
    train(network, epoch)
    test(network)

Test set: Avg. loss: 2.3204, Accuracy: 1230/10000 (12%)

Train Epoch: 1 completed          	Loss: 0.400245
Test set: Avg. loss: 0.2832, Accuracy: 9193/10000 (92%)

Train Epoch: 2 completed          	Loss: 0.347525
Test set: Avg. loss: 0.2295, Accuracy: 9335/10000 (93%)

Train Epoch: 3 completed          	Loss: 0.110739
Test set: Avg. loss: 0.1981, Accuracy: 9417/10000 (94%)

Train Epoch: 4 completed          	Loss: 0.046442
Test set: Avg. loss: 0.1773, Accuracy: 9462/10000 (95%)

Train Epoch: 5 completed          	Loss: 0.203828
Test set: Avg. loss: 0.1640, Accuracy: 9502/10000 (95%)



In [334]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')

Text(0, 0.5, 'negative log likelihood loss')

In [423]:
def plotGradients(gradients, end, num_bins, num_samples):
    step_size = end/num_bins
    bins = np.arange(0, end, step_size)
    samples = np.arange(0, num_samples)
    X, Y = np.meshgrid(bins, samples)
    Z = np.empty([samples.shape[0], bins.shape[0]])
    bins = torch.arange(0, end+step_size, step_size)   # including the rightmost edge.

    for i,sample in enumerate(gradients):
        Z[i] = torch.histogram(torch.abs(sample), bins=bins, density=True).hist

    fig = plt.figure()
    ax = plt.axes(projection='3d')
    ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none')
    ax.set_title('Surface plot')
    plt.show()

In [436]:
print(fc1_weights.shape)
plotGradients(fc1_weights, 9, 20, n_epochs)

torch.Size([5, 25088])
