In [1]:
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import pickle
import platform
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!git clone https://github.com/vita-epfl/DLAV-2025.git
path = os.getcwd() + '/DLAV-2025/homeworks/hw2/test_batch'

Cloning into 'DLAV-2025'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 47 (delta 7), reused 6 (delta 6), pack-reused 32 (from 1)[K
Receiving objects: 100% (47/47), 27.72 MiB | 13.55 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [21]:
# Write the location of the saved weight relative to this notebook. Assume that they are in the same directory
### Path to Model Weights
pytorch_weights = os.getcwd() + '/drive/MyDrive/Colab Notebooks/linearClassifier_pytorch.ckpt'
softmax_weights = os.getcwd() + '/drive/MyDrive/Colab Notebooks/softmax_weights.pkl'

**TODO:** Copy your code from the Softmax Notebook to their corresponding function

In [29]:

def softmax_loss_vectorized(W, X, y):
    """
  Softmax loss function, vectorized version.
  Inputs and outputs are the same as softmax_loss_naive.
  """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    y_pred = X @ W
    y_pred -= np.max(y_pred, axis=1, keepdims=True)
    y_pred = np.exp(y_pred)
    y_pred /= np.sum(y_pred, axis=1, keepdims=True)
    loss = -np.sum(np.log(y_pred[range(X.shape[0]), y]))
    loss /= X.shape[0]
    y_pred[range(X.shape[0]), y] -= 1
    dW = X.T @ y_pred
    dW /= X.shape[0]
    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW

class LinearClassifier(object):

    def __init__(self):
        self.W = None


    def train(self, X, y, learning_rate=1e-3, num_iters=30000,
                batch_size=200, verbose=False):
        """
        Train this linear classifier using stochastic gradient descent.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.
        - y: A numpy array of shape (N,) containing training labels; y[i] = c
          means that X[i] has label 0 <= c < C for C classes.
        - learning_rate: (float) learning rate for optimization.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.

        Outputs:
        A list containing the value of the loss function at each training iteration.
        """

        num_train, dim = X.shape
        num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes

        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None

            #########################################################################
            # TODO:                                                                 #
            # Sample batch_size elements from the training data and their           #
            # corresponding labels to use in this round of gradient descent.        #
            # Store the data in X_batch and their corresponding labels in           #
            # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
            # and y_batch should have shape (batch_size,)                           #
            #                                                                       #
            # Hint: Use np.random.choice to generate indices. Sampling with         #
            # replacement is faster than sampling without replacement.              #
            #########################################################################
            indices = np.random.choice(num_train, batch_size)
            X_batch = X[indices]
            y_batch = y[indices]
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch)
            loss_history.append(loss)

            # perform parameter update
            #########################################################################
            # TODO:                                                                 #
            # Update the weights using the gradient and the learning rate.          #
            #########################################################################
            self.W -= learning_rate * grad
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))


        return loss_history



    def predict(self, X):
        """
        Use the trained weights of this linear classifier to predict labels for
        data points.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.

        Returns:
        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
          array of length N, and each element is an integer giving the predicted
          class.
        """

        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the predicted labels in y_pred.            #
        ###########################################################################
        y_pred = X @ self.W
        y_pred -= np.max(y_pred, axis=1, keepdims=True)
        y_pred = np.exp(y_pred)
        y_pred /= np.sum(y_pred, axis=1, keepdims=True)
        # print(f"Softmax probs are {y_pred}")
        y_pred = np.argmax(y_pred, axis=1)
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_pred

    def loss(self, X_batch, y_batch):
        """
        Compute the loss function and its derivative.
        Subclasses will override this.

        Inputs:
        - X_batch: A numpy array of shape (N, D) containing a minibatch of N
          data points; each point has dimension D.
        - y_batch: A numpy array of shape (N,) containing labels for the minibatch.


        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an array of the same shape as W

         e = y_batch - np.dot(X_batch, self.W)

        loss = np.dot(e.T, e)
        grad = -np.dot(x_batch.T,e) / x_batch.shape[0]

        return loss, grad

        """

        pass



class Softmax(LinearClassifier):
    """ A subclass that uses the Softmax + Cross-entropy loss function """

    def loss(self, X_batch, y_batch):
        return softmax_loss_vectorized(self.W, X_batch, y_batch)

**TODO:** Copy the model you created from the Pytorch Notebook

In [7]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(n_feature, n_hidden)
        self.fc2 = torch.nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = x.view(x.size(0),-1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    # !!New method added from the Pytorch Notebook
    def predict(self, x):
      logits = self.forward(x)
      return F.softmax(logits)

**TODO**: Follow the instructions in each of the following methods. **Note that these methods should return a 1-D array of size N where N is the number of data samples. The values should be the predicted classes [0,...,9].**



In [37]:
def predict_usingPytorch(X):
    #########################################################################
    # TODO:                                                                 #
    # - Create your model                                                   #
    # - Load your saved model                                               #
    # - Do the operation required to get the predictions                    #
    # - Return predictions in a numpy array (hint: return "argmax")         #
    #########################################################################
    net = Net(3072, 600, 10)
    net.load_state_dict(torch.load(pytorch_weights))
    # y_pred = np.array(range(X.size(0)))
    probs = net.predict(X)
    print(f"pytorch probs are {probs}")
    y_pred = torch.argmax(probs, dim=1)
    print(f"pytorch prediction is {y_pred}")
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################
    return y_pred.numpy()

def predict_usingSoftmax(X):
    #########################################################################
    # TODO:                                                                 #
    # - Load your saved model into the weights of Softmax                   #
    # - Do the operation required to get the predictions                    #
    # - Return predictions in a numpy array                                 #
    #########################################################################
    with open('drive/MyDrive/Colab Notebooks/softmax_weights.pkl', 'rb') as f:
      W = pickle.load(f)
    # print(f"softmax weights are {W}")
    new_softmax = Softmax()
    new_softmax.W = W.copy()
    y_pred = new_softmax.predict(X)
    # print(f"softmax prediction is {y_pred}")
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################
    return y_pred

This method loads the test dataset to evaluate the model.

In [16]:
## Read DATA
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))

def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = load_pickle(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y
test_filename = path
X,Y = load_CIFAR_batch(test_filename)

This code snippet prepares the data for the different models. If you modify data manipulation in your notebooks, make sure to include them here.

In [18]:
## Data Manipulation

mean = np.array([0.4914, 0.4822, 0.4465])
std = np.array([0.2023, 0.1994, 0.2010])
X = np.divide(np.subtract( X/255 , mean[np.newaxis,np.newaxis,:]), std[np.newaxis,np.newaxis,:])

X_pytorch = torch.Tensor(np.moveaxis(X,-1,1))
X_softmax = np.reshape(X, (X.shape[0], -1))
X_softmax = np.hstack([X_softmax, np.ones((X_softmax.shape[0], 1))])


In [19]:
print(X_pytorch.shape)
print(X_softmax.shape)

torch.Size([10000, 3, 32, 32])
(10000, 3073)


Runs evaluation on the Pytorch and softmax model. **Be careful that *prediction_pytorch* and *prediction_softmax* are 1-D array of size N where N is the number of data samples. The values should be the predicted class [0,...,9]**

---



In [38]:
## Run Prediction
prediction_pytorch = predict_usingPytorch(X_pytorch)
prediction_softmax = predict_usingSoftmax(X_softmax)

## Run Evaluation
acc_softmax = sum(prediction_softmax == Y)/len(X)
acc_pytorch = sum(prediction_pytorch == Y)/len(X)
print("Softmax= %f ... Pytorch= %f"%(acc_softmax, acc_pytorch))

  net.load_state_dict(torch.load(pytorch_weights))


pytorch probs are tensor([[0.0315, 0.0351, 0.3518,  ..., 0.0191, 0.0032, 0.0057],
        [0.0338, 0.0368, 0.3556,  ..., 0.0194, 0.0033, 0.0064],
        [0.0339, 0.0358, 0.3562,  ..., 0.0193, 0.0033, 0.0062],
        ...,
        [0.0303, 0.0325, 0.3535,  ..., 0.0193, 0.0030, 0.0054],
        [0.0314, 0.0344, 0.3510,  ..., 0.0195, 0.0031, 0.0057],
        [0.0310, 0.0339, 0.3524,  ..., 0.0202, 0.0030, 0.0057]],
       grad_fn=<SoftmaxBackward0>)
pytorch prediction is tensor([2, 2, 2,  ..., 2, 2, 2])
Softmax probs are [[0.02398246 0.26096146 0.03513365 ... 0.11211585 0.02538142 0.10318027]
 [0.02469627 0.26706417 0.03443716 ... 0.11286426 0.02619485 0.11087338]
 [0.02512423 0.2637516  0.03514134 ... 0.11492727 0.02648502 0.10965312]
 ...
 [0.02401481 0.25141787 0.03441598 ... 0.11726684 0.02451482 0.10397931]
 [0.02362384 0.256266   0.03455883 ... 0.11477297 0.02485136 0.10531852]
 [0.02421012 0.25407564 0.03528871 ... 0.11977435 0.02470398 0.10500255]]
Softmax= 0.100000 ... Pytorch= 0

  return F.softmax(logits)
