<a href="https://colab.research.google.com/github/samibahig/IFT6390/blob/main/Multi_SVM_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
class SVM:
    def __init__(self, eta, C, niter, batch_size, verbose):
        self.eta = eta
        self.C = C
        self.niter = niter
        self.batch_size = batch_size
        self.verbose = verbose

    def make_one_versus_all_labels(self, y, m):
        """
        y : numpy array of shape (n,)
        m : int (num_classes)
        returns : numpy array of shape (n, m)
        """
        dim1 = y.shape[0]
        dim2 = m
        xx=np.full((dim1,dim2),-1)
   
        for index, label in enumerate(y):
           xx[index][label]=1        
        return xx

    

    def compute_loss (self, x, y):
        """
        x : numpy array of shape (minibatch size, num_features)
        y : numpy array of shape (minibatch size, num_classes)
        returns : float
        """
        xw= np.matmul(x,self.w)
        wxy= 2-xw*y                    #2-xw.y for each element of matrix

        wxy = wxy.clip(min=0)         #make all negative elements zero

        wxy_sq = wxy**2               #square all values

        #apply sum here
        sum_wxy_sq = np.sum(wxy_sq)
        #print ("sum_wxy_sq", sum_wxy_sq)        

        minibatch_size= x.shape[0]
        term1 = sum_wxy_sq

        term2 = np.sum(self.C*0.5*(self.w)**2)

        loss = (term1)/(minibatch_size) + term2

        #print ("loss", loss)
        return loss

   

    def compute_gradient (self, x, y):
        """
        x : numpy array of shape (minibatch size, num_features)
        y : numpy array of shape (minibatch size, num_classes)
        returns : numpy array of shape (num_features, num_classes)
        """
        minibatch_size = x.shape[0]
        num_features=x.shape[1]
        num_classes=y.shape[1]

        #print ("In compute_gradient")

        
        #loss matrix to check for 0 values
        xw= np.matmul(x,self.w)
        loss = 2-xw*y                    #2-xw.y for each element of matrix
        loss = loss.clip(min=0)
        
        
        #initialize grad1
        grad_1 = np.zeros ((num_features, num_classes))    
        x_tr = x.T    
        
        for jj in range(num_classes):

            for ii in range (minibatch_size):

                if (loss[ii][jj] > 0.0):

                    #if (ii == minibatch_size-1):
                        #print ("stop")
                    #x_row = np.transpose(x[ii,:])
                    ###step-1: x_row = x_tr[:,ii]

                    ###step-2: term1 = -2*loss[ii][jj]*x_row*y[ii][jj]
                    #print (term1.shape)
                    #print (term1[num_features-1])
                    #x_row_2 += x_row*term1

                    grad_1[:,jj] += -2*loss[ii][jj]*y[ii][jj]*x_tr[:,ii]

                    #print (grad_1[num_features-1,jj])
                else:
                    pass
              

        #Add term2 to the gradients
        grad_2 = self.C*(self.w)
        #print (grad_2.shape)

        #Add 2 terms and divide by n
        grad = (grad_1 )/(minibatch_size) + grad_2
        
        return grad

   

    def compute_gradient_ok_locally_but_fails_on_gradescope_dont_use (self, x, y):
        """
        x : numpy array of shape (minibatch size, num_features)
        y : numpy array of shape (minibatch size, num_classes)
        returns : numpy array of shape (num_features, num_classes)
        """
        minibatch_size = x.shape[0]
        num_features=x.shape[1]
        num_classes=y.shape[1]

        #loss matrix to check for 0 values
        xw= np.matmul(x,self.w)
        loss = 2-xw*y                    #2-xw.y for each element of matrix

        loss_copy = loss.clip(min=0)
        lossval = np.sum(loss_copy)

        #initialize grad1
        grad_1 = np.zeros ((num_features, num_classes))
        #x_tr = -(x.T)

        for ii in range(num_classes):
            print ("Gradient for class:", ii)
            y_row = y[:,ii]
            #print (y_row.shape)
            #print (x_tr.shape)
            t2 = -(x.T)*y_row
            #print (t2.shape)
            t2sum = np.sum(t2,axis=1)
            #print (t2sum.shape)
            #todo - check if t2 all columns should be added to create 1 col which goes as deriv[1]
            grad_1[:,ii] += t2sum  #TODO - check syntax
        
        for kk in range(num_features):
            for jj in range (num_classes):
                if (loss[kk][jj] < 0.0):
                    grad_1[kk][jj] = 0.0
        
        #grad = 2*L*dL/dw (MSE gradient) and hence we do the following, 
        grad_1 = 2*lossval*grad_1

        #Add term2 to the gradients
        grad_2 = self.C*(self.w)
        #print (grad_2.shape)

        #Add 2 terms and divide by n
        grad = (grad_1 + grad_2)/(minibatch_size)
        
        return grad


    def compute_gradient_slow_dont_use_me (self, x, y):
        """
        x : numpy array of shape (minibatch size, num_features)
        y : numpy array of shape (minibatch size, num_classes)
        returns : numpy array of shape (num_features, num_classes)
        """
        xw= np.matmul(x,self.w)
        xwy = xw*y
        loss = 2 - xwy   #this loss is computed to ensure no contribution made for negative nums 

        #compute loss fn value (without using square)
        loss_copy = loss.clip(min=0)
        lossval = np.sum(loss_copy)

        num_examples = x.shape[0]
        num_features = x.shape[1]
        num_classes = y.shape[1] 
        
        #following formula is for grad_a = -x.y, how will this change for our example??
        grad_a = np.zeros ([num_features, num_classes])

        for ii in range(num_classes):   #doing 1 class at a time

            for kk in range (num_examples):   #over all training examples

                for jj in range (num_features):  # over all features

                    if (loss[kk][ii] > 0.0):                  # (2- xw.y=0)
                       grad_a[jj][ii]+= -x[kk][jj]*y[kk][ii]    # y value of ii 
                    else:
                        grad_a[jj][ii]+=0.0

        #Now take elementwise product of Loss & Grad1
        #grad = 2*L*dL/dw (MSE gradient) and hence we do the following, 
        grad1 = 2*lossval*grad_a

        #Add term2 to the gradients
        grad2 = self.C*(self.w)
        print (grad2.shape)

        #Add 2 terms and divide by n
        grad = (grad1 + grad2)/(num_examples)
        
        return grad



    # Batcher function
    def minibatch(self, iterable1, iterable2, size=1):
        l = len(iterable1)
        n = size
        for ndx in range(0, l, n):
            index2 = min(ndx + n, l)
            yield iterable1[ndx: index2], iterable2[ndx: index2]

    def infer(self, x):
        """
        x : numpy array of shape (num_examples_to_infer, num_features)
        returns : numpy array of shape (num_examples_to_infer, num_classes)
        """
        xw= np.matmul(x,self.w)
        class_per_example=np.argmax(xw, axis = 1)
        inf = self.make_one_versus_all_labels(class_per_example, self.m)
        return inf

        pass

    def compute_accuracy(self, y_inferred, y):
        """
        y_inferred : numpy array of shape (num_examples, num_classes)
        y : numpy array of shape (num_examples, num_classes)
        returns : float
        """
        no_examples = y.shape[0]

        actual_y=np.argmax(y, axis = 1)
        inferred_y=np.argmax(y_inferred, axis = 1)

        assert (len(actual_y)== no_examples)
        assert (len(inferred_y)== no_examples)


        no_correct=np.sum(actual_y == inferred_y)

        accuracy = no_correct / no_examples

        return accuracy
        pass

    def fit(self, x_train, y_train, x_test, y_test):
        """
        x_train : numpy array of shape (number of training examples, num_features)
        y_train : numpy array of shape (number of training examples, num_classes)
        x_test : numpy array of shape (number of training examples, nujm_features)
        y_test : numpy array of shape (number of training examples, num_classes)
        returns : float, float, float, float
        """
        self.num_features = x_train.shape[1]
        self.m = y_train.max() + 1
        y_train = self.make_one_versus_all_labels(y_train, self.m)
        y_test = self.make_one_versus_all_labels(y_test, self.m)
        self.w = np.zeros([self.num_features, self.m])

        train_losses = []
        train_accs = []
        test_losses = []
        test_accs = []

        for iteration in range(self.niter):
            # Train one pass through the training set
            for x, y in self.minibatch(x_train, y_train, size=self.batch_size):
                grad = self.compute_gradient(x, y)
                self.w -= self.eta * grad

            # Measure loss and accuracy on training set
            train_loss = self.compute_loss(x_train, y_train)
            y_inferred = self.infer(x_train)
            train_accuracy = self.compute_accuracy(y_inferred, y_train)

            # Measure loss and accuracy on test set
            test_loss = self.compute_loss(x_test, y_test)
            y_inferred = self.infer(x_test)
            test_accuracy = self.compute_accuracy(y_inferred, y_test)

            if self.verbose:
                print ("iter=", iteration)
                print(f"Iteration {iteration} | Train loss {train_loss:.04f} | Train acc {train_accuracy:.04f} |"
                      f" Test loss {test_loss:.04f} | Test acc {test_accuracy:.04f}")

            # Record losses, accs
            train_losses.append(train_loss)
            train_accs.append(train_accuracy)
            test_losses.append(test_loss)
            test_accs.append(test_accuracy)

        return train_losses, train_accs, test_losses, test_accs


# DO NOT MODIFY THIS FUNCTION
def load_data():
    # Load the data files
    print("Loading data...")
    x_train = np.load("train_features_cifar100_reduced.npz")["train_data"]
    x_test = np.load("test_features_cifar100_reduced.npz")["test_data"]
    y_train = np.load("train_labels_cifar100_reduced.npz")["train_label"]
    y_test = np.load("test_labels_cifar100_reduced.npz")["test_label"]

    # normalize the data
    mean = x_train.mean(axis=0)
    std = x_train.std(axis=0)
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std

    # add implicit bias in the feature
    x_train = np.concatenate([x_train, np.ones((x_train.shape[0], 1))], axis=1)
    x_test = np.concatenate([x_test, np.ones((x_test.shape[0], 1))], axis=1)

    return x_train, y_train, x_test, y_test


if __name__ == "__main__":

    x_train, y_train, x_test, y_test = load_data()
    
    print("Fitting the model...")
    svm = SVM(eta=0.0001, C=1, niter=200, batch_size=5000, verbose=True)
    train_losses, train_accs, test_losses, test_accs = svm.fit(x_train, y_train, x_test, y_test)

    # # to infer after training, do the following:
    y_inferred = svm.infer(x_test)
    y_test_ova = svm.make_one_versus_all_labels(y_test, 8)
    ## to compute the gradient or loss before training, do the following:
    oldTestCode=False

    if (oldTestCode):
        y_train_ova = svm.make_one_versus_all_labels(y_train, 8) # one-versus-all labels
        svm.w = np.zeros([3073, 8])
        
        loss = svm.compute_loss(x_train, y_train_ova)
        grad = svm.compute_gradient(x_train, y_train_ova)

    print ("done")


Loading data...
Fitting the model...
iter= 0
Iteration 0 | Train loss 31.5301 | Train acc 0.3036 | Test loss 31.5436 | Test acc 0.2960
iter= 1
Iteration 1 | Train loss 31.2682 | Train acc 0.3212 | Test loss 31.2893 | Test acc 0.3125
iter= 2
Iteration 2 | Train loss 31.0809 | Train acc 0.3311 | Test loss 31.1074 | Test acc 0.3222
iter= 3
Iteration 3 | Train loss 30.9356 | Train acc 0.3366 | Test loss 30.9668 | Test acc 0.3280
iter= 4
Iteration 4 | Train loss 30.8180 | Train acc 0.3432 | Test loss 30.8536 | Test acc 0.3347
iter= 5
Iteration 5 | Train loss 30.7199 | Train acc 0.3485 | Test loss 30.7596 | Test acc 0.3410
iter= 6
Iteration 6 | Train loss 30.6355 | Train acc 0.3513 | Test loss 30.6791 | Test acc 0.3448
iter= 7
Iteration 7 | Train loss 30.5611 | Train acc 0.3549 | Test loss 30.6083 | Test acc 0.3488
iter= 8
Iteration 8 | Train loss 30.4939 | Train acc 0.3596 | Test loss 30.5445 | Test acc 0.3510
iter= 9
Iteration 9 | Train loss 30.4321 | Train acc 0.3619 | Test loss 30.4859 |