## **Lab 03 Training a Neural Network Using PyTorch Autograd**

Paige Rosynek, 03.30.2023

**Import Libraries**

In [2]:
import os
import torch
import torchvision

### **Computing Forward**

**Case 1: no regularization**

In [3]:
x = torch.tensor([0.5, -5.0]) 
W = torch.tensor([[2.0, 0.5],
                 [3.0, 0.25],
                 [4.0, 0.1]], requires_grad=True)
b1 = torch.tensor([1.5, 5.0, 10.0], requires_grad=True)
M = torch.tensor([[5.5, 2.0, 0.5],
                 [1.5, 1.0, -1.0]], requires_grad=True)
b2 = torch.tensor([2.0, 5.0], requires_grad=True)

# input -> hidden
z = torch.matmul(W, x) + b1
h = torch.relu(z)
print(f'z = {z}')
print(f'h = {h}')

# hidden -> output
o = torch.matmul(M, h) + b2
print(f'o = {o}')

z = tensor([ 0.0000,  5.2500, 11.5000], grad_fn=<AddBackward0>)
h = tensor([ 0.0000,  5.2500, 11.5000], grad_fn=<ReluBackward0>)
o = tensor([18.2500, -1.2500], grad_fn=<AddBackward0>)


**Case 2: regularization**

In [4]:
x = torch.tensor([1.0, 2.0])   
W = torch.tensor([[1.0, 0.5],
                 [0.5, -1.0],
                 [2.0, 5.0]], requires_grad=True)
b1 = torch.tensor([2.0, 1.0, -2.0], requires_grad=True)
M = torch.tensor([[5.0, 0.5, 1.0],
                 [0.5, 1.0, 2.0]], requires_grad=True)
b2 = torch.tensor([4.0, -5.0], requires_grad=True)

# input -> hidden
z = torch.matmul(W, x) + b1
h = torch.relu(z)
print(f'z = {z}')
print(f'h = {h}')

# hidden -> output
o = torch.matmul(M, h) + b2
print(f'o = {o}')

reg = 0.1
s1 = torch.sum(W**2) 
s2 = torch.sum(M**2) 
s = (reg / 2) * (s1 + s2)
print(f'(regularization) s1 = {s1}')
print(f'(regularization) s2 = {s2}')
print(f'regularization s = {s}')

z = tensor([ 4.0000, -0.5000, 10.0000], grad_fn=<AddBackward0>)
h = tensor([ 4.,  0., 10.], grad_fn=<ReluBackward0>)
o = tensor([34., 17.], grad_fn=<AddBackward0>)
(regularization) s1 = 31.5
(regularization) s2 = 31.5
regularization s = 3.1500000953674316


### **Backpropagation with Autograd**

In [5]:
TRAINING_POINTS = 1000

In [6]:
def create_linear_training_data():
    """
    This method simply rotates points in a 2D space.
    Be sure to use MSE in the place of the final softmax layer before testing on this
    data!
    :return: (x,y) the dataset. x is a numpy array where columns are training samples and
             y is a numpy array where columns are one-hot labels for the training sample.
    """
    x = torch.randn((2, TRAINING_POINTS))
    x1 = x[0:1, :].clone()
    x2 = x[1:2, :]
    y = torch.cat((-x2, x1), axis=0)
    return x, y

In [7]:
EPOCHS = 30          # epochs
BATCH_SIZE = 10
NUM_BATCHES = TRAINING_POINTS // BATCH_SIZE

In [8]:
learning_rate = 0.01
reg = 0.001

# TODO: Build your network. 
W = torch.randn(3, 2) * 0.1                  # weights : input -> hidden
W.requires_grad = True
b_1 = torch.zeros(3, 1, requires_grad=True)     

M = torch.randn(2, 3) * 0.1                  # weights : hidden -> output
M.requires_grad = True
b_2 = torch.zeros(2, 1, requires_grad=True)

# TODO: Select your datasource.
x_train, y_train = create_linear_training_data()


# TODO: Train your network.
#---------- training loop ----------
for epoch in range(EPOCHS):
    for i in range(NUM_BATCHES + 1):
        x_batch = x_train[:, i:i+BATCH_SIZE]
        y_batch = y_train[:, i:i+BATCH_SIZE]

        #---------- forward pass ----------
        z = torch.matmul(W, x_batch) + b_1    # input -> hidden
        h = torch.relu(z)
        o = torch.matmul(M, h) + b_2          # hidden -> output (no activation)
        
        #---------- regularization ----------
        s1 = torch.sum(W**2)                  
        s2 = torch.sum(M**2)                  
        s = (reg / 2) * (s1 + s2)             # regularization term

        # calculate loss - MSE
        L = torch.mean((y_batch - o) ** 2)

        #---------- objective function ----------
        J = L + s 

        # backpropagation
        J.backward()

        # update weights - gradient descent
        # no_grad : updates weights without changing / tracking the changes in gradients
        with torch.no_grad():
            W -= learning_rate * W.grad
            b_1 -= learning_rate * b_1.grad
            M -= learning_rate * M.grad
            b_2 -= learning_rate * b_2.grad

        # clear the gradients
        W.grad.zero_()
        b_1.grad.zero_()
        M.grad.zero_()
        b_2.grad.zero_()

    print('-----------------------------------------------')
    print(f'Epoch: {epoch + 1}\tLoss: {J}')



# TODO: Sanity-check the output of your network.
# You can optionally compute the error on this test data:
# x_test, y_test = create_linear_training_data()
# h = torch.relu(torch.matmul(W, x_test) + b_1)
# y_hat = torch.matmul(M, h) + b_2                 
# loss = torch.mean((y_test - y_hat) ** 2)

# print('\n____________________________________')
# print(f'Final Loss : {loss}')
# print('____________________________________')

# But you must computed W*M as discussed in the lab assignment.
with torch.no_grad():
    print(f'\nW * M = \n{torch.matmul(M, W)}')

-----------------------------------------------
Epoch: 1	Loss: 0.675707221031189
-----------------------------------------------
Epoch: 2	Loss: 0.658433735370636
-----------------------------------------------
Epoch: 3	Loss: 0.6097180247306824
-----------------------------------------------
Epoch: 4	Loss: 0.5260932445526123
-----------------------------------------------
Epoch: 5	Loss: 0.44804874062538147
-----------------------------------------------
Epoch: 6	Loss: 0.4172821640968323
-----------------------------------------------
Epoch: 7	Loss: 0.4070117771625519
-----------------------------------------------
Epoch: 8	Loss: 0.3791813552379608
-----------------------------------------------
Epoch: 9	Loss: 0.318312406539917
-----------------------------------------------
Epoch: 10	Loss: 0.23061519861221313
-----------------------------------------------
Epoch: 11	Loss: 0.153119757771492
-----------------------------------------------
Epoch: 12	Loss: 0.10446981340646744
--------------