# Hello World for Pytorch

The tensors in pytorch are creatd by `torch.tensor([...], dtype = ...)`. The operations of tensors are same as the ones in numpy. 

In [1]:
import torch
########### broadcasting example ###########
a = torch.rand(5, 2)
b = torch.rand(1, 2)
print(b*a)
############### dot product ################
a = torch.rand(5)
b = torch.rand(5)
print(torch.dot(a, b))
########### matrix multiplication ##########
a = torch.rand(5, 4)
b = torch.rand(4, 3)
print(torch.matmul(a, b))

tensor([[0.0165, 0.0875],
        [0.0512, 0.1382],
        [0.1086, 0.3146],
        [0.0998, 0.0288],
        [0.0075, 0.4139]])
tensor(1.5872)
tensor([[1.5601, 1.8872, 1.8746],
        [0.9561, 0.8141, 0.6230],
        [0.9546, 0.8443, 0.4169],
        [1.3867, 0.9729, 1.1332],
        [1.3957, 1.9708, 1.6355]])


Differention on pytorch is done using a built in internal engine called `torch.autograd`. The independent variables are set to required grad. In the following example, $y = {\bf w}^t {\bf x} + b$. The gradietns at $x$ and $b$ are given by `x.grad` and `b.grad`.

In [2]:
b = torch.rand(1, requires_grad = True)
x = torch.rand(5, requires_grad = True)
w = torch.rand(5)

y = torch.dot(w, x) + b

y.backward()

print(b.grad, x.grad)

tensor([1.]) tensor([0.8710, 0.2257, 0.1938, 0.7747, 0.3650])


**Exercise 1.** Construct a linear regression using gradient descent. First initialize the parameters $w$ and $b$. Then generate random data for $x$ and $y$.

In [153]:
# first create the parameters for linear regression
k = 5
b = torch.tensor(8, dtype = torch.float)
w = torch.rand(k)

In [154]:
# generate the train data
x_train = torch.rand(100, k)*100
y_train = torch.matmul(x_train, w) + b + torch.rand(100)*2

In [135]:
# define the loss function
def loss(y_1, y_2): return torch.sum((y_1 - y_2)**2)

In [159]:
W = torch.rand(k, 1, requires_grad = True)
B = torch.rand(1, requires_grad = True)
lr = torch.tensor([0.01], dtype = torch.float)

epochs = 1000
LOSS = []

for i in range(epochs+1):
    y_hat = torch.matmul(x_train, W) + B    
    
    l = loss(torch.reshape(y_train, y_hat.shape), y_hat)
    l.backward()
    
    LOSS.append(l)    
    
    # normaize the gradients
    n_factor = torch.sqrt(torch.sum(W.grad.data**2) + B.grad.data**2)
    W_dir = W.grad.data/n_factor
    B_dir = B.grad.data/n_factor
    
    W.data =  W.data - lr*W_dir
    B.data = B.data - lr*B_dir
    
    
    if i%int(epochs/10) == 0 : print("Epoch :", i, "\t LOSS :", round(LOSS[-1].data.item(), 2))    
    
    
    W.grad.data.zero_()
    B.grad.data.zero_()

Epoch : 0 	 LOSS : 235794.5
Epoch : 100 	 LOSS : 1948.11
Epoch : 200 	 LOSS : 546.07
Epoch : 300 	 LOSS : 545.22
Epoch : 400 	 LOSS : 544.38
Epoch : 500 	 LOSS : 543.53
Epoch : 600 	 LOSS : 542.69
Epoch : 700 	 LOSS : 541.85
Epoch : 800 	 LOSS : 541.02
Epoch : 900 	 LOSS : 540.18
Epoch : 1000 	 LOSS : 539.35


In [152]:
print(torch.reshape(W, w.shape).data, w)
print(B, b)

tensor([0.0168, 0.6260, 0.8029, 0.7137, 0.8705]) tensor([0.0010, 0.6104, 0.7901, 0.7014, 0.8607])
tensor([6.2963], requires_grad=True) tensor(8.)


We can use one layer Neural Network with $10$ units to approximate it as well

In [160]:
W1 = torch.rand(k, 10, requires_grad = True)
B1 = torch.rand(1, requires_grad = True)

W2 = torch.rand(10, 1, requires_grad = True)
B2 = torch.rand(1, requires_grad = True)

lr = torch.tensor([0.01], dtype = torch.float)

epochs = 1000
LOSS = []

for i in range(epochs+1):
    y_hat = torch.matmul(torch.matmul(x_train, W1) + B1, W2) + B2
    
    l = loss(torch.reshape(y_train, y_hat.shape), y_hat)
    l.backward()
    
    LOSS.append(l)    
    
    
    n1_factor = torch.sqrt(torch.sum(W1.grad.data**2) + B1.grad.data**2)
    W1_dir = W1.grad.data/n1_factor
    B1_dir = B1.grad.data/n1_factor
    
    n2_factor = torch.sqrt(torch.sum(W2.grad.data**2) + B2.grad.data**2)
    W2_dir = W2.grad.data/n2_factor
    B2_dir = B2.grad.data/n2_factor
    
    W1.data = W1.data - lr*W1_dir
    B1.data = B1.data - lr*B1_dir
    
    W2.data = W2.data - lr*W2_dir
    B2.data = B2.data - lr*B2_dir
    
    
    if i%int(epochs/10) == 0 : print("Epoch :", i, "\t LOSS :", round(LOSS[-1].data.item(), 2))    
    
    
    W1.grad.data.zero_()
    B1.grad.data.zero_()
    
    W2.grad.data.zero_()
    B2.grad.data.zero_()

Epoch : 0 	 LOSS : 50178656.0
Epoch : 100 	 LOSS : 1845363.25
Epoch : 200 	 LOSS : 1149.38
Epoch : 300 	 LOSS : 857.78
Epoch : 400 	 LOSS : 855.26
Epoch : 500 	 LOSS : 853.42
Epoch : 600 	 LOSS : 851.55
Epoch : 700 	 LOSS : 849.69
Epoch : 800 	 LOSS : 847.83
Epoch : 900 	 LOSS : 845.99
Epoch : 1000 	 LOSS : 844.14
