In [11]:
import vugrad as vg
import numpy as np
import plotly.express as px
import pandas as pd

### Question 5

In [2]:
a = vg.TensorNode(np.random.randn(2, 2))
b = vg.TensorNode(np.random.randn(2, 2))
c = a + b

In [4]:
c.value # value after the summation of nodes a and b

array([[-0.62734419,  0.96965596],
       [-0.20857508, -0.71375401]])

In [9]:
c.source # location of operation node

<vugrad.core.OpNode at 0x1f9cf3bbfd0>

In [11]:
c.source.inputs[0].value # value of the first input of the operation (so value of a)

array([[ 0.89399539, -0.37864434],
       [-0.87517414,  0.53242794]])

In [12]:
a.grad

array([[0., 0.],
       [0., 0.]])

### Question 9

In [12]:
# Create a simple neural network.
# This is a `Module` consisting of other modules representing linear layers, provided by the vugrad library.
class MLP(vg.Module):
    """
    A simple MLP with one hidden layer, and a sigmoid non-linearity on the hidden layer and a softmax on the
    output.
    """

    def __init__(self, input_size, output_size, hidden_mult=4, init='glorot', activation='sigmoid'):
        """
        :param input_size:
        :param output_size:
        :param hidden_mult: Multiplier that indicates how many times bigger the hidden layer is than the input layer.
        """
        super().__init__()

        hidden_size = hidden_mult * input_size
        # -- There is no common wisdom on how big the hidden size should be, apart from the idea
        #    that it should be strictly _bigger_ than the input if at all possible.

        self.layer1 = vg.Linear(input_size, hidden_size, init)
        self.layer2 = vg.Linear(hidden_size, output_size, init)
        # -- The linear layer (without activation) is implemented in vugrad. We simply instantiate these modules, and
        #    add them to our network.
        if activation == 'relu':
            self.activation =  vg.relu
        else:
            self.activation =  vg.sigmoid

    def forward(self, input):

        assert len(input.size()) == 2

        # first layer
        hidden = self.layer1(input)

        # non-linearity
        hidden = self.activation(hidden)
        # -- We've called a utility function here, to mimin how this is usually done in pytorch. We could also do:
        #    hidden = Sigmoid.do_forward(hidden)
        #    hidden = ReLU.do_forward(hidden)

        # second layer
        output = self.layer2(hidden)

        # softmax activation
        output = vg.logsoftmax(output)
        # -- the logsoftmax computes the _logarithm_ of the probabilities produced by softmax. This makes the computation
        #    of the CE loss more stable when the probabilities get close to 0 (remember that the CE loss is the logarithm
        #    of these probabilities). It needs to be implemented in a specific way. See the source for details.

        return output

    def parameters(self):

        return self.layer1.parameters() + self.layer2.parameters()

In [13]:
def train_MLP(args, data, activation, hidden_mult=4, init='glorot'):
    (xtrain, ytrain), (xval, yval), num_classes = data
    num_instances, num_features = xtrain.shape

    # Initialize data
    batch_losses = []
    epochs = []
    accuracies = []
    losses = []

    ## Instantiate the model
    mlp = MLP(input_size=num_features, output_size=num_classes, hidden_mult=hidden_mult, activation=activation, init=init)

    n, m = xtrain.shape
    b = args['batch_size']

    print('\n## Starting training')
    for epoch in range(args['epochs']):

        print(f'{activation} epoch {epoch:03}')

        ## Compute validation accuracy
        o = mlp(vg.TensorNode(xval))
        oval = o.value

        predictions = np.argmax(oval, axis=1)
        num_correct = (predictions == yval).sum()
        acc = num_correct / yval.shape[0]

        o.clear() # gc the computation graph
        print(f'       accuracy: {acc:.4}')

        accuracies.append(acc)
        epochs.append(epoch)


        cl = 0.0 # running sum of the training loss

        # We loop over the data in batches of size `b`
        for fr in range(0, n, b):

            # The end index of the batch
            to = min(fr + b, n)

            # Slice out the batch and its corresponding target values
            batch, targets = xtrain[fr:to, :], ytrain[fr:to]

            # Wrap the inputs in a Node
            batch = vg.TensorNode(value=batch)

            outputs = mlp(batch)
            loss = vg.logceloss(outputs, targets)
            # -- The computation graph is now complete. It consists of the MLP, together with the computation of
            #    the scalar loss.
            # -- The variable `loss` is the TensorNode at the very top of our computation graph. This means we can call
            #    it to perform operations on the computation graph, like clearing the gradients, starting the backpropgation
            #    and clearing the graph.
            # -- Note that we set the MLP up to produce log probabilties, so we should compute the CE loss for these.

            cl += loss.value
            # -- We must be careful here to extract the _raw_ value for the running loss. What would happen if we kept
            #    a running sum using the TensorNode?

            batch_losses.append(loss.value)

            # Start the backpropagation
            loss.backward()

            # pply gradient descent
            for parm in mlp.parameters():
                parm.value -= args['lr'] * parm.grad
                # -- Note that we are directly manipulating the members of the parm TensorNode. This means that for this
                #    part, we are not building up a computation graph.

            # -- In Pytorch, the gradient descent is abstracted away into an Optimizer. This allows us to build slightly more
            #    complexoptimizers than plain graident descent.

            # Finally, we need to reset the gradients to zero ...
            loss.zero_grad()
            # ... and delete the parts of the computation graph we don't need to remember.
            loss.clear()

        losses.append(cl/n)
        print(f'   running loss: {cl/n:.4}')

    return losses, accuracies, epochs, batch_losses

In [13]:
args = {
    'batch_size': 128,
    # 'lr': 0.01, # vg.load_synth()
    'lr': 0.0001, # vg.load_mnist(final=False, flatten=True)
    'epochs': 10
}

losses_sigmoid, accuracies_sigmoid, epochs_sigmoid, batch_losses_sigmoid = train_MLP(args=args, data=vg.load_mnist(final=False, flatten=True), activation='sigmoid')
losses_relu, accuracies_relu, epochs_relu, batch_losses_relu = train_MLP(args=args, data=vg.load_mnist(final=False, flatten=True), activation='relu')


## Starting training
sigmoid epoch 000
       accuracy: 0.1022
   running loss: 0.3984
sigmoid epoch 001
       accuracy: 0.947
   running loss: 0.2123
sigmoid epoch 002
       accuracy: 0.957
   running loss: 0.1602
sigmoid epoch 003
       accuracy: 0.9612
   running loss: 0.1268
sigmoid epoch 004
       accuracy: 0.964
   running loss: 0.1025
sigmoid epoch 005
       accuracy: 0.9664
   running loss: 0.08388
sigmoid epoch 006
       accuracy: 0.9678
   running loss: 0.0697
sigmoid epoch 007
       accuracy: 0.9676
   running loss: 0.05876
sigmoid epoch 008
       accuracy: 0.9688
   running loss: 0.05015
sigmoid epoch 009
       accuracy: 0.9694
   running loss: 0.04342

## Starting training
relu epoch 000
       accuracy: 0.042
   running loss: 1.864e+03
relu epoch 001
       accuracy: 0.794
   running loss: 0.6548
relu epoch 002
       accuracy: 0.909
   running loss: 0.4874
relu epoch 003
       accuracy: 0.916
   running loss: 0.4606
relu epoch 004
       accuracy: 0.9434
   ru

In [4]:
def normalize_data(dataset):
    return (dataset - np.min(dataset)) / (np.max(dataset) - np.min(dataset))

In [5]:
def plot_diagrams(epochs_loss_relu, epochs_acc_relu,
                  epochs_loss_sigmoid, epochs_acc_sigmoid, 
                  epochs_batch_loss=[], epochs_batch_acc=[], total_epochs=None):
    batch_list = []
    if len(epochs_batch_loss)>0: batch_list = list(normalize_data([i for i in range(len(epochs_batch_loss))])*total_epochs)
      
    loss = epochs_loss_relu + epochs_loss_sigmoid + epochs_batch_loss
    acc = epochs_acc_relu + epochs_acc_sigmoid + epochs_batch_acc
    vector_epochs = [i for i in range(1, len(epochs_loss_relu)+1)]*2
    vector_epochs += batch_list
    name = ['relu']*len(epochs_loss_relu) + ['sigmoid']*len(epochs_loss_sigmoid) + ['batch']*len(epochs_batch_acc)

    df = pd.DataFrame({'epochs': vector_epochs,'loss': loss, 'name': name})
    fig = px.line(df, x='epochs', y='loss', color='name', markers=False, width=600, height=400)
    fig.show()

    df = pd.DataFrame({'epochs': vector_epochs,'Accuracy': acc, 'name': name})
    fig = px.line(df, x='epochs', y='Accuracy', color='name', markers=False, width=600, height=400)
    fig.show()

In [15]:
plot_diagrams(losses_relu, accuracies_relu,
              losses_sigmoid, accuracies_sigmoid,
              [], [], total_epochs=len(losses_relu))

### Question 10

In [14]:
def plot_data(results, type='validation', total_epochs=5, names=[]):
    loss = []
    acc = []
    vector_epochs = []
    name = []
    for index, res in enumerate(results):
        result = list(res.values())
        if type=='batch':  
            vector_epochs += list(normalize_data([i for i in range(len(result[0]))])*total_epochs)
        elif len(vector_epochs) == 0:
            vector_epochs = [i for i in range(1, len(result[0])+1)]*len(results)
      
        loss = loss + result[0]
        acc = acc + result[1]
        name = name + [names[index]]*len(result[0])

    print(len(vector_epochs), len(loss), len(name), len(acc))

    df = pd.DataFrame({'epochs': vector_epochs,'loss': loss, 'name': name})
    fig = px.line(df, x='epochs', y='loss', color='name', markers=False, width=600, height=400)
    fig.show()

    df = pd.DataFrame({'epochs': vector_epochs,'Accuracy': acc, 'name': name})
    fig = px.line(df, x='epochs', y='Accuracy', color='name', markers=False, width=600, height=400)
    fig.show()

In [22]:
args = {
    'batch_size': 128,
    'lr': 0.0001, # vg.load_mnist(final=False, flatten=True)
    'epochs': 5
}

hidden_mults = [4,6,8,10]
res_hidden = {}
for hidden_mult in hidden_mults:
    print(f"--- Hidden Nodes: {hidden_mult} ---")
    losses_hidden, accuracies_hidden, epochs_hidden, batch_losses_hidden = train_MLP(args=args, data=vg.load_mnist(final=False, flatten=True), hidden_mult=hidden_mult, activation='sigmoid')
    res_hidden[hidden_mult] = {
        'losses_hidden': losses_hidden,
        'accuracies_hidden': accuracies_hidden,
        'batch_losses_hidden': batch_losses_hidden
    }


--- Hidden Nodes: 4 ---

## Starting training
sigmoid epoch 000
       accuracy: 0.112
   running loss: 0.4057
sigmoid epoch 001
       accuracy: 0.9502
   running loss: 0.2127
sigmoid epoch 002
       accuracy: 0.9566
   running loss: 0.162
sigmoid epoch 003
       accuracy: 0.9624
   running loss: 0.1288
sigmoid epoch 004
       accuracy: 0.9644
   running loss: 0.1048
--- Hidden Nodes: 6 ---

## Starting training
sigmoid epoch 000
       accuracy: 0.109
   running loss: 0.3715
sigmoid epoch 001
       accuracy: 0.953
   running loss: 0.1832
sigmoid epoch 002
       accuracy: 0.9616
   running loss: 0.1315
sigmoid epoch 003
       accuracy: 0.9648
   running loss: 0.09865
sigmoid epoch 004
       accuracy: 0.9652
   running loss: 0.0762
--- Hidden Nodes: 8 ---

## Starting training
sigmoid epoch 000
       accuracy: 0.114
   running loss: 0.3657
sigmoid epoch 001
       accuracy: 0.9516
   running loss: 0.162
sigmoid epoch 002
       accuracy: 0.9604
   running loss: 0.11
sigmoid epo

In [23]:
plot_data(res_hidden.values(), names=list(res_hidden.keys()))

20 20 20 20


In [16]:
args = {
    'batch_size': 128,
    'lr': 0.0001, # vg.load_mnist(final=False, flatten=True)
    'epochs': 5
}

initializations = ['glorot', 'glorot_uniform', 'he', 'he_uniform', 'zeros']
res_init = {}
for init in initializations: 
    print(f"--- Hidden Nodes: {init} ---")
    losses_init, accuracies_init, epochs_init, batch_losses_init = \
        train_MLP(args=args, data=vg.load_mnist(final=False, flatten=True), init=init, activation='sigmoid')
    res_init[init] = {
        'losses_init': losses_init,
        'accuracies_init': accuracies_init,
        'batch_losses_init': batch_losses_init
    }

--- Hidden Nodes: glorot ---

## Starting training
sigmoid epoch 000
       accuracy: 0.0876
   running loss: 0.4056
sigmoid epoch 001
       accuracy: 0.9456
   running loss: 0.2127
sigmoid epoch 002
       accuracy: 0.957
   running loss: 0.1614
sigmoid epoch 003
       accuracy: 0.9608
   running loss: 0.1284
sigmoid epoch 004
       accuracy: 0.963
   running loss: 0.1048
--- Hidden Nodes: glorot_uniform ---

## Starting training
sigmoid epoch 000
       accuracy: 0.1148
   running loss: 0.4421
sigmoid epoch 001
       accuracy: 0.9418
   running loss: 0.2237
sigmoid epoch 002
       accuracy: 0.9542
   running loss: 0.1707
sigmoid epoch 003
       accuracy: 0.9582
   running loss: 0.135
sigmoid epoch 004
       accuracy: 0.9608
   running loss: 0.1087
--- Hidden Nodes: he ---

## Starting training
sigmoid epoch 000


  sigx =  1 / (1 + np.exp(-input))


       accuracy: 0.1008
   running loss: 0.5295
sigmoid epoch 001
       accuracy: 0.9294
   running loss: 0.2798
sigmoid epoch 002
       accuracy: 0.9426
   running loss: 0.224
sigmoid epoch 003
       accuracy: 0.9462
   running loss: 0.1888
sigmoid epoch 004
       accuracy: 0.9512
   running loss: 0.1636
--- Hidden Nodes: he_uniform ---

## Starting training
sigmoid epoch 000
       accuracy: 0.0982
   running loss: 0.5864
sigmoid epoch 001
       accuracy: 0.9194
   running loss: 0.3005
sigmoid epoch 002
       accuracy: 0.9328
   running loss: 0.2399
sigmoid epoch 003
       accuracy: 0.9392
   running loss: 0.2021
sigmoid epoch 004
       accuracy: 0.9456
   running loss: 0.174
--- Hidden Nodes: zeros ---

## Starting training
sigmoid epoch 000
       accuracy: 0.109
   running loss: 0.4031
sigmoid epoch 001
       accuracy: 0.9496
   running loss: 0.2121
sigmoid epoch 002
       accuracy: 0.96
   running loss: 0.1605
sigmoid epoch 003
       accuracy: 0.963
   running loss: 0.

In [18]:
plot_data(res_init.values(), names=list(res_init.keys()))

25 25 25 25


### Question 11

In [29]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [30]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

##### Train Network

In [35]:
def train(net, epochs, trainloader, criterion, optimizer):
    train_batch_loss = []
    train_accuracy = []
    train_loss = []
    for epoch in range(epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        correct = 0
        total = 0
        loss_mean = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            loss_mean += loss.item()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0

            train_batch_loss.append(loss.item())
        train_loss.append(loss_mean/total)
        train_accuracy.append(correct/total)

    print('Finished Training')
    return train_loss, train_accuracy, train_batch_loss

##### Test Network

In [37]:
def test(net, testloader, classes):
    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = net(images)
            
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # collect the correct predictions for each class
            _, predictions = torch.max(outputs, 1)
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1  
                
    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
    
    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    return (correct / total)

##### Experiments

In [57]:
def plot_alphas(results, type='validation', total_epochs=5, names=[]):
    loss = []
    acc = []
    vector_epochs = []
    name = []
    for index, res in enumerate(results):
        result = list(res.values())
        if type=='batch':  
            vector_epochs += list(normalize_data([i for i in range(len(result[0]))])*total_epochs)
        elif len(vector_epochs) == 0:
            vector_epochs = [i for i in range(1, len(result[0])+1)]*4
      
        loss = loss + result[0]
        acc = acc + result[1]
        name = name + [names[index]]*len(result[0])

    print(len(vector_epochs), len(loss), len(name), len(acc))

    df = pd.DataFrame({'epochs': vector_epochs,'loss': loss, 'name': name})
    fig = px.line(df, x='epochs', y='loss', color='name', markers=False, width=600, height=400)
    fig.show()

    df = pd.DataFrame({'epochs': vector_epochs,'Accuracy': acc, 'name': name})
    fig = px.line(df, x='epochs', y='Accuracy', color='name', markers=False, width=600, height=400)
    fig.show()

In [39]:
def experiment(net, batch_size, epochs, transform, criterion, optimizer):
    classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
            
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                download=False, transform=transform)

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                                shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=False, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                                shuffle=False, num_workers=2)

    # train
    train_loss, train_accuracy, train_batch_loss = train(net, epochs, trainloader, criterion, optimizer)
    # predict
    accuracy = test(net, testloader, classes)

    return train_loss, train_accuracy, train_batch_loss, accuracy

In [41]:
# Experiment 0 - tutorial parameters
net = Net()

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
lr = 0.001
batch_size = 4
epochs = 2
momentum=0.9
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
criterion = nn.CrossEntropyLoss()

# train_loss_exp0, train_accuracy_exp0, train_batch_loss_exp0, accuracy_exp0 = experiment(net, batch_size, epochs, transform, criterion, optimizer)

In [42]:
traindata = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=False, transform=transform)
#split the set 
size = len(traindata)
trainset, valset = torch.utils.data.random_split(traindata, [int(size*0.9), int(size*0.1)])

In [43]:
# Experiment 1 - change learning rate
learning_rates = [0.1, 0.01, 0.001, 0.0001]
results_exp1 = {}

for lr in learning_rates:
    print("Test learning rate: ", lr)
    net = Net()

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    batch_size = 4
    epochs = 5
    momentum=0.9
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()

    train_loss_exp1, train_accuracy_exp1, train_batch_loss_exp1, accuracy_exp1 = \
        experiment(net, batch_size, epochs, transform, criterion, optimizer)
    results_exp1[lr] = {'train_loss': train_loss_exp1, 
        'train_accuracy': train_accuracy_exp1, 
        'train_batch_loss': train_batch_loss_exp1,
        'accuracy': accuracy_exp1}

Test learning rate:  0.1
[1,  2000] loss: 2.360
[1,  4000] loss: 2.362
[1,  6000] loss: 2.362
[1,  8000] loss: 2.361
[1, 10000] loss: 2.365
[1, 12000] loss: 2.362
[2,  2000] loss: 2.361
[2,  4000] loss: 2.363
[2,  6000] loss: 2.362
[2,  8000] loss: 2.356
[2, 10000] loss: 2.357
[2, 12000] loss: 2.360
[3,  2000] loss: 2.362
[3,  4000] loss: 2.360
[3,  6000] loss: 2.362
[3,  8000] loss: 2.358
[3, 10000] loss: 2.358
[3, 12000] loss: 2.356
[4,  2000] loss: 2.360
[4,  4000] loss: 2.362
[4,  6000] loss: 2.361
[4,  8000] loss: 2.360
[4, 10000] loss: 2.362
[4, 12000] loss: 2.357
[5,  2000] loss: 2.359
[5,  4000] loss: 2.362
[5,  6000] loss: 2.361
[5,  8000] loss: 2.356
[5, 10000] loss: 2.360
[5, 12000] loss: 2.361
Finished Training
Accuracy for class: plane is 0.0 %
Accuracy for class: car   is 0.0 %
Accuracy for class: bird  is 100.0 %
Accuracy for class: cat   is 0.0 %
Accuracy for class: deer  is 0.0 %
Accuracy for class: dog   is 0.0 %
Accuracy for class: frog  is 0.0 %
Accuracy for class: 

In [45]:
plot_alphas(results=results_exp1.values(), type='validation', total_epochs=5, names=list(results_exp1.keys()))

20 20 20 20


In [46]:
# Experiment 2 - change batch size
batch_sizes = [4, 50, 100, 150]
results_exp2 = {}

for batch_size in batch_sizes:
    print("Test batch size: ", batch_size)
    net = Net()

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    lr = 0.001
    epochs = 5
    momentum=0.9
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()

    train_loss_exp2, train_accuracy_exp2, train_batch_loss_exp2, accuracy_exp2 = \
        experiment(net, batch_size, epochs, transform, criterion, optimizer)
    results_exp2[batch_size] = {'train_loss': train_loss_exp2, 
        'train_accuracy': train_accuracy_exp2, 
        'train_batch_loss': train_batch_loss_exp2,
        'accuracy': accuracy_exp2}

Test batch size:  4
[1,  2000] loss: 2.215
[1,  4000] loss: 1.834
[1,  6000] loss: 1.653
[1,  8000] loss: 1.584
[1, 10000] loss: 1.491
[1, 12000] loss: 1.462
[2,  2000] loss: 1.410
[2,  4000] loss: 1.364
[2,  6000] loss: 1.327
[2,  8000] loss: 1.319
[2, 10000] loss: 1.297
[2, 12000] loss: 1.272
[3,  2000] loss: 1.193
[3,  4000] loss: 1.202
[3,  6000] loss: 1.198
[3,  8000] loss: 1.198
[3, 10000] loss: 1.173
[3, 12000] loss: 1.176
[4,  2000] loss: 1.086
[4,  4000] loss: 1.111
[4,  6000] loss: 1.100
[4,  8000] loss: 1.100
[4, 10000] loss: 1.091
[4, 12000] loss: 1.095
[5,  2000] loss: 1.015
[5,  4000] loss: 1.017
[5,  6000] loss: 1.029
[5,  8000] loss: 1.058
[5, 10000] loss: 1.050
[5, 12000] loss: 1.026
Finished Training
Accuracy for class: plane is 71.1 %
Accuracy for class: car   is 79.8 %
Accuracy for class: bird  is 36.5 %
Accuracy for class: cat   is 56.1 %
Accuracy for class: deer  is 45.0 %
Accuracy for class: dog   is 52.7 %
Accuracy for class: frog  is 74.8 %
Accuracy for class: 

In [58]:
plot_alphas(results=results_exp2.values(), type='validation', total_epochs=5, names=list(results_exp2.keys()))

20 20 20 20


### Question 12

In [64]:
# Experiment 3 - change optimizer
optimizers = [optim.SGD, optim.Adam, optim.RMSprop]
results_exp3 = {}

for opt in optimizers:
    net = Net()

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    lr = 0.001
    batch_size = 4
    epochs = 5
    optimizer = opt(net.parameters(), lr=lr)
    if opt.__name__ == 'SGD':
        optimizer = opt(net.parameters(), lr=lr, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    train_loss_exp3, train_accuracy_exp3, train_batch_loss_exp3, accuracy_exp3 = experiment(net, batch_size, epochs, transform, criterion, optimizer)
    results_exp3[opt.__name__] = {'train_loss': train_loss_exp3, 
    'train_accuracy': train_accuracy_exp3, 
    'train_batch_loss': train_batch_loss_exp3,
    'accuracy': accuracy_exp3}

[1,  2000] loss: 2.276
[1,  4000] loss: 1.937
[1,  6000] loss: 1.691
[1,  8000] loss: 1.590
[1, 10000] loss: 1.520
[1, 12000] loss: 1.478
[2,  2000] loss: 1.395
[2,  4000] loss: 1.345
[2,  6000] loss: 1.353
[2,  8000] loss: 1.310
[2, 10000] loss: 1.268
[2, 12000] loss: 1.280
[3,  2000] loss: 1.201
[3,  4000] loss: 1.194
[3,  6000] loss: 1.190
[3,  8000] loss: 1.179
[3, 10000] loss: 1.183
[3, 12000] loss: 1.183
[4,  2000] loss: 1.093
[4,  4000] loss: 1.116
[4,  6000] loss: 1.119
[4,  8000] loss: 1.106
[4, 10000] loss: 1.073
[4, 12000] loss: 1.104
[5,  2000] loss: 1.024
[5,  4000] loss: 1.042
[5,  6000] loss: 1.048
[5,  8000] loss: 1.033
[5, 10000] loss: 1.039
[5, 12000] loss: 1.049
Finished Training
Accuracy for class: plane is 70.0 %
Accuracy for class: car   is 79.2 %
Accuracy for class: bird  is 51.7 %
Accuracy for class: cat   is 34.4 %
Accuracy for class: deer  is 52.5 %
Accuracy for class: dog   is 38.8 %
Accuracy for class: frog  is 72.0 %
Accuracy for class: horse is 64.6 %
Accu

In [68]:
plot_data(results=results_exp3.values(), type='validation', total_epochs=5, names=list(results_exp3.keys()))

15 15 15 15
