In [3]:
import importlib

import numpy as np
import pandas as pd
import torch
from torch import optim

import assignment


# Advanced Deep Learning ─ Assignment 1

## Question 1

Try to load the same data directly from the "MINST database" website http://yann.lecun.com/exdb/mnist/. Be careful that the images can have a different normalization and encoding


In [None]:
importlib.reload(assignment)

assignment.load_data_torch()
# Set data sets
X_train = assignment.load_data_ylc(
  file_name="train-images-idx3-ubyte.gz",
  is_image=True,
  nb_images=60000,
)
y_train = assignment.load_data_ylc(
  file_name="train-labels-idx1-ubyte.gz",
  is_image=False,
  nb_images=60000,
  normalize=False,
)
X_test = assignment.load_data_ylc(
  file_name="t10k-images-idx3-ubyte.gz",
  is_image=True,
  nb_images=10000,
)
y_test = assignment.load_data_ylc(
  file_name="t10k-labels-idx1-ubyte.gz",
  is_image=False,
  nb_images=10000,
  normalize=False,
)
# Transform labels to one_hot encoding
y_train_one_hot = torch.nn.functional.one_hot(
  y_train.to(torch.int64), num_classes=10
).float()
y_test_one_hot = torch.nn.functional.one_hot(
  y_test.to(torch.int64), num_classes=10
).float()


### Q2

Using the utilities in plt and numpy display some images and check that the corresponding labels are consistent.


In [None]:
fig = assignment.display_digits(X_train=X_train, y_train=y_train)
fig.show()
fig.write_image("data/labels.png")


### Q3

Complete the code below so to have a MLP with one hidden layer with 300 neurons. \
Remember that we want one-hot outputs.


In [None]:
# Let us define the neural network we are using

hidden_sizes = [16, 16]
net = define_net(hidden_sizes=hidden_sizes)

# Now we define the optimizer and the loss function
loss = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

# Initialize arrays to track errors
# The test error array is there for informative purposes.
# We do not use it when updating weights.
# In a real world scenario, we shoudln't even look at it to choose when to (early-) stop training.
error_train = []
error_test = []

inputs = torch.flatten(X_train, start_dim=1, end_dim=2)
labels = y_train_one_hot


sum([p.numel() for p in net.parameters()])


device = torch.device("cuda")
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)
net = net.to(device)
inputs = inputs.to(device)
labels = labels.to(device)
y_test_one_hot = y_test_one_hot.to(device)


### Q4

Complete the code below to perform a GD based optimization


In [None]:
for k in range(20000):
  optimizer.zero_grad()

  outputs = net(inputs)
  outputs = outputs.to(device)

  # Define the empirical risk
  risk = loss(outputs, labels)

  # Make the backward step (1 line instruction)
  risk.backward()

  # Update the parameters (1 line instruction)
  optimizer.step()

  with torch.no_grad():
    y_pred_one_hot = net(torch.flatten(X_test, start_dim=1, end_dim=2))
    prediction_loss = loss(y_pred_one_hot, y_test_one_hot)

    error_train.append(risk.item())
    error_test.append(prediction_loss.item())

    print(
      f"k = {k}, \tRisk = {risk.item()}, \tPrediction loss = {prediction_loss.item()}"
    )


df_results = pd.DataFrame({"train_error": error_train, "test_error": error_test})


In [None]:
fig = assignment.plot_errors(df_results=df_results, hidden_sizes=hidden_sizes)
fig.show()

# Write image with logarithmic scale
fig = assignment.plot_errors(
  df_results=df_results, hidden_sizes=hidden_sizes, log_y=True
)
fig.show()


### Q5
Compute the final accuracy on test set


In [None]:
y_pred_one_hot = net(torch.flatten(X_test, start_dim=1, end_dim=2))
y_pred = torch.argmax(input=y_pred_one_hot, dim=1)
acc = (y_test == y_pred).sum() / len(y_test)
print("Final accuracy on test", float(acc))
