In [1]:
import torch

In [2]:
import numpy as np

In [3]:
Inputs=np.array(
                [[73,67,43],
                 [91,88,64],
                 [87,134,58],
                 [102,43,37],
                 [69,96,70]
                ], dtype='float32'
)

Targets=np.array([
                [56,70],
                [81,101],
                [119,133],
                [22,37],
                [103,119]
],dtype='float32')

In [4]:
Inputs.dtype

dtype('float32')

In [5]:
Targets

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)

In [6]:
inputs=torch.from_numpy(Inputs)
targets=torch.from_numpy(Targets)

In [7]:
print(inputs)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])


In [8]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [9]:
#We will start with random value
w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)
loss=torch.tensor(2.0,requires_grad=True)

In [10]:
w

tensor([[-0.0416, -0.1877,  1.6296],
        [-0.7736,  0.5379,  2.2422]], requires_grad=True)

In [11]:
b

tensor([-0.0281,  0.4673], requires_grad=True)

In [12]:
def model(x):
    return x@w.t()+b

In [13]:
preds=model(inputs)

In [14]:
preds

tensor([[ 54.4356,  76.4451],
        [ 83.9676, 120.9020],
        [ 65.7243, 135.2850],
        [ 47.9558,  27.6471],
        [ 93.1587, 155.6786]], grad_fn=<AddBackward0>)

In [15]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [16]:
#MSE Loss = difference square ==> sum it up ==> devide by number of elements
def mse(t1,t2):
    diff=t1-t2
    return torch.sum(diff*diff)/diff.numel()

In [17]:
loss= mse(preds,targets)

In [18]:
loss

tensor(549.5757, grad_fn=<DivBackward0>)

In [19]:
loss.backward()

In [20]:
print(w)
w.grad

tensor([[-0.0416, -0.1877,  1.6296],
        [-0.7736,  0.5379,  2.2422]], requires_grad=True)


tensor([[ -502.1391, -1362.2559,  -539.1724],
        [  811.4393,  1121.6715,   780.9682]])

In [21]:
print(b)
b.grad

tensor([-0.0281,  0.4673], requires_grad=True)


tensor([-7.1516, 11.1916])

In [22]:
le=0.00001
w = w - le*w.grad
b = b - le*b.grad

In [23]:
w

tensor([[-0.0366, -0.1740,  1.6350],
        [-0.7818,  0.5267,  2.2344]], grad_fn=<SubBackward0>)

In [24]:
b

tensor([-0.0280,  0.4672], grad_fn=<SubBackward0>)

In [25]:
#Let's modify our weight and bias multiple times in order to reach optimum value
for i in range(1000):  
    preds=model(inputs)
    loss= mse(preds,targets)
    print(loss)
    w.retain_grad()
    b.retain_grad()
    print(w.grad)
    print(b.grad)
    loss.backward()
    print(w.grad)
    print(b.grad)
    w = w - le*w.grad
    b = b - le*b.grad
    print("Epoch:", i)
    print("Loss:",loss)
    print("Wight:",w)
    print("Bias:",b)
    

tensor(504.4145, grad_fn=<DivBackward0>)
None
None
tensor([[ -344.3463, -1187.6105,  -432.6481],
        [  637.3398,   932.9778,   664.7554]])
tensor([-5.2683,  9.1216])
Epoch: 0
Loss: tensor(504.4145, grad_fn=<DivBackward0>)
Wight: tensor([[-0.0331, -0.1622,  1.6393],
        [-0.7881,  0.5173,  2.2278]], grad_fn=<SubBackward0>)
Bias: tensor([-0.0279,  0.4671], grad_fn=<SubBackward0>)
tensor(472.8268, grad_fn=<DivBackward0>)
None
None
tensor([[ -215.2654, -1043.8408,  -345.1712],
        [  494.5739,   777.9542,   569.3196]])
tensor([-3.7257,  7.4233])
Epoch: 1
Loss: tensor(472.8268, grad_fn=<DivBackward0>)
Wight: tensor([[-0.0310, -0.1517,  1.6428],
        [-0.7931,  0.5095,  2.2221]], grad_fn=<SubBackward0>)
Bias: tensor([-0.0279,  0.4670], grad_fn=<SubBackward0>)
tensor(450.3995, grad_fn=<DivBackward0>)
None
None
tensor([[-109.7522, -925.4208, -273.3312],
        [ 377.5306,  650.5728,  490.9408]])
tensor([-2.4627,  6.0302])
Epoch: 2
Loss: tensor(450.3995, grad_fn=<DivBackward0>)

In [26]:
loss

tensor(24.2696, grad_fn=<DivBackward0>)

In [27]:
preds=model(inputs)
preds

tensor([[ 56.8046,  69.9341],
        [ 85.2965, 104.3857],
        [112.2459, 125.1124],
        [ 19.3566,  34.7848],
        [108.2948, 126.9948]], grad_fn=<AddBackward0>)

In [28]:
Targets

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)