In [1]:
import torch
import numpy as np

In [11]:
inputs=np.array([[73,67,43],
[91,88,64],
[87,134,58],
[102,43,37],
[69,96,70]],dtype='float32')

In [12]:
targets=np.array([[56,70],
[81,101],
[119,133],
[22,37],
[103,119]],dtype='float32')

In [13]:
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)
print(inputs)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])


In [17]:
w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)
print(w)
print(b)

tensor([[-0.8433, -1.0182, -1.0400],
        [ 0.1418, -0.0664, -1.1317]], requires_grad=True)
tensor([1.5669, 0.2428], requires_grad=True)


In [15]:
def model(x):
    return x @ w.t()+b

In [18]:
preds=model(inputs)
print(preds)

tensor([[-172.9344,  -42.5152],
        [-231.3361,  -65.1223],
        [-268.5614,  -61.9535],
        [-166.7130,  -30.0184],
        [-227.1691,  -75.5639]], grad_fn=<AddBackward0>)


In [19]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [20]:
diff=preds-targets
diff_sq=diff*diff
torch.sum(diff_sq)/diff.numel()

tensor(56540.2617, grad_fn=<DivBackward0>)

In [21]:
def mse(t1,t2):
    diff=t1-t2
    return torch.sum(diff*diff/diff.numel())

In [22]:
loss=mse(preds,targets)
print(loss)

tensor(56540.2578, grad_fn=<SumBackward0>)


In [23]:
loss.backward()

In [24]:
print(w)
print(w.grad)

tensor([[-0.8433, -1.0182, -1.0400],
        [ 0.1418, -0.0664, -1.1317]], requires_grad=True)
tensor([[-24176.6055, -26913.6582, -16481.2930],
        [-12110.4961, -13968.1953,  -8575.2871]])


In [25]:
print(b)
print(b.grad)

tensor([1.5669, 0.2428], requires_grad=True)
tensor([-289.5428, -147.0347])


In [26]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [28]:
preds=model(inputs)
print(preds)

tensor([[-172.9344,  -42.5152],
        [-231.3361,  -65.1223],
        [-268.5614,  -61.9535],
        [-166.7130,  -30.0184],
        [-227.1691,  -75.5639]], grad_fn=<AddBackward0>)


In [29]:
loss=mse(preds,targets)
print(loss)

tensor(56540.2578, grad_fn=<SumBackward0>)


In [30]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-24176.6055, -26913.6582, -16481.2930],
        [-12110.4961, -13968.1953,  -8575.2871]])
tensor([-289.5428, -147.0347])


In [31]:
with torch.no_grad():
    w-=w.grad*1e-5
    b-=b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()

In [32]:
print(w)
print(b)

tensor([[-0.6015, -0.7491, -0.8752],
        [ 0.2629,  0.0733, -1.0460]], requires_grad=True)
tensor([1.5698, 0.2443], requires_grad=True)


In [33]:
 preds=model(inputs)
 loss=mse(preds,targets)
 print(loss)

tensor(38367.7383, grad_fn=<SumBackward0>)


In [44]:
for i in range(1,100):
    preds=model(inputs)
    loss=mse(preds,targets)
    loss.backward()
    with torch.no_grad():
        w-=w.grad * 1e-5
        b-= b.grad *1e-5
        w.grad.zero_()
        b.grad.zero_()

In [45]:
preds=model(inputs)
loss=mse(preds,targets)
print(loss)

tensor(63.3896, grad_fn=<SumBackward0>)


In [46]:
preds

tensor([[ 58.1837,  71.5463],
        [ 78.8839,  93.8677],
        [124.6181, 146.4411],
        [ 25.0627,  43.1955],
        [ 94.1275, 103.7462]], grad_fn=<AddBackward0>)

In [47]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])