In [48]:
import torch
import numpy as np

#  Basics

In [55]:
x = torch.rand(2,2)
y = torch.rand(2,2)

x = torch.mul(x,y)
y = x.mul_(y)
z = x * y
print(x)
print(y)
print(z)

tensor([[0.2592, 0.2591],
        [0.0296, 0.4160]])
tensor([[0.2592, 0.2591],
        [0.0296, 0.4160]])
tensor([[0.0672, 0.0671],
        [0.0009, 0.1731]])


In [58]:
x = torch.rand(4,4)
print(x)
y = x.view(-1,8)
print(y)

tensor([[0.0942, 0.7946, 0.3245, 0.6256],
        [0.1589, 0.1302, 0.2667, 0.0672],
        [0.2486, 0.2328, 0.8652, 0.9747],
        [0.1063, 0.3693, 0.7558, 0.8764]])
tensor([[0.0942, 0.7946, 0.3245, 0.6256, 0.1589, 0.1302, 0.2667, 0.0672],
        [0.2486, 0.2328, 0.8652, 0.9747, 0.1063, 0.3693, 0.7558, 0.8764]])


In [59]:
a = torch.ones(5)
print(a)
b = a.numpy
print(type(b))

# Other way around
a = np.ones(5)
b = torch.from_numpy(a) #, dtype)

# Changing one of these will modify both because they are stored in the same memory location (CPU)
# Unless you have CUDA enabled
if torch.cuda.is_available():
    device = torch.device("cuda")
    x = torch.ones(5, device=device)
    y = torch.ones
    y = y.to(device)
    z = x + y

tensor([1., 1., 1., 1., 1.])
<class 'builtin_function_or_method'>


# Autograd <img src="Autograd.png" style="width:110px;height=90px">

In [74]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x + 2
print(y)
z = y*y*2
print(z)

tensor([-0.3603,  0.7865,  0.5891], requires_grad=True)
tensor([1.6397, 2.7865, 2.5891], grad_fn=<AddBackward0>)
tensor([ 5.3773, 15.5295, 13.4065], grad_fn=<MulBackward0>)


In [70]:
z = z.mean()
print(z)

tensor(15.6665, grad_fn=<MeanBackward0>)


In [71]:
z.backward() # dz/dx
print(x.grad) # only works for scalar outputs

tensor([4.8804, 2.9830, 3.0102])


In [75]:
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # need argument if it's not a scalar
print(x.grad) # only works for scalar outputs

tensor([6.5588e-01, 1.1146e+01, 1.0356e-02])


In [77]:
# how to prevent tracking the gradient
x.requires_grad_(False)

x.detach()

with torch.no_grad():
    y = x + 2
    print(y)

tensor([1.6397, 2.7865, 2.5891])


In [83]:
# whenever the backward function is called, the graident for the tensor will be accumulated to the .grad attribute
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad)
    
    weights.grad.zero_() # empty the gradients before next step
    
optimizer = torch.optim.SGD(weights, lr=0.01) # for gradient descent
#optimizer.step()
#optimizer.zero_grad()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


# Backpropagation (Know Chain Rule)

In [84]:
# Chain rule (dz/dx = dz/dy * dy/dx)

## Local Gradients (dz/dx = (dx*y)/dx = y)           (dz/dy = (dx*y)/dy = x)

## 1) Forward Pass: Compute Loss    2) Compute Local Gradients    3) Backward Pass: Compute dLoss/ dWeights using the Chain Rule

In [87]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

#forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2

print(loss)

# backward pass
loss.backward()
print(w.grad)

### update weights
### next forward and backward passes

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)
