In [96]:
### Notebook introduces important methods of torch that help in declaring the inputs
### help in capturing input that varies using requires_grad. Finally shares the 
### forward and backward pass both visually and via code

In [2]:
import torch
# first method is creating the empty tensor
a = torch.empty(1)
# it creates a tensor with single element, that has a very small value
#vgjhbk
a

tensor([0.])

In [6]:
# create another empty tensor with different dtype
b = torch.empty(2, 1, dtype=torch.float16)
# what happens when you print the tensor
# this is a 2 row, 1 col tenso
print(b)

# how about rotating it, and making it random floats, (cannot be integers)
b = torch.rand(1, 2)

print(b)

tensor([[0.],
        [0.]], dtype=torch.float16)
tensor([[0.5879, 0.5271]])


In [9]:
# create another empty tensor with integer 
d = torch.ones(2, 1, dtype=torch.int)
print(d)

tensor([[1],
        [1]], dtype=torch.int32)


In [10]:
# getting tensor size
d.size()

torch.Size([2, 1])

In [11]:
# tensor from data structures
c = torch.tensor([5.8, 26])
c

tensor([ 5.8000, 26.0000])

In [26]:
e = torch.tensor([2, 2])
f = torch.tensor([3, 3])
print(e, f)
g = e + f  # result assigned to g
print(g)

tensor([2, 2]) tensor([3, 3])
tensor([5, 5])


In [27]:
h = e - f
print(h)

tensor([-1, -1])


In [28]:
# inplace element wise sub, add, mul, div
e.sub_(f)
print(e)

tensor([-1, -1])


In [29]:
# Other inplace, elementwite operation
e.mul_(f)
print(e)
e.add_(f)
print(e)

tensor([-3, -3])
tensor([0, 0])


In [30]:
dir(e)

['H',
 'T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__dlpack__',
 '__dlpack_device__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ifloordiv__',
 '__ilshift__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr_

In [None]:
e.div_(f)
print(e)

In [31]:
i = torch.rand(5, 3)

print(i)

tensor([[0.7855, 0.8078, 0.3850],
        [0.9398, 0.8041, 0.2503],
        [0.2848, 0.0276, 0.1198],
        [0.1640, 0.4142, 0.1393],
        [0.3580, 0.5598, 0.0241]])


In [32]:
# slicing for only first col
print(i[:, 0])
# slicing for only first row
print(i[0, :])

tensor([0.7855, 0.9398, 0.2848, 0.1640, 0.3580])
tensor([0.7855, 0.8078, 0.3850])


In [36]:
# reshaping a tensor, with a view method
j = i.view(15)
print(j)

# -1 can let torch to find the best dim
k = i.view(-1, 5)
print(k)

# getting a numpy array
np_l = k.numpy()
print(np_l)

tensor([0.7855, 0.8078, 0.3850, 0.9398, 0.8041, 0.2503, 0.2848, 0.0276, 0.1198,
        0.1640, 0.4142, 0.1393, 0.3580, 0.5598, 0.0241])
tensor([[0.7855, 0.8078, 0.3850, 0.9398, 0.8041],
        [0.2503, 0.2848, 0.0276, 0.1198, 0.1640],
        [0.4142, 0.1393, 0.3580, 0.5598, 0.0241]])
[[0.7854996  0.8077906  0.38495678 0.9398346  0.8041014 ]
 [0.25034112 0.28475666 0.02763796 0.11980534 0.1639812 ]
 [0.4142294  0.13928437 0.35802352 0.5597684  0.02412969]]


In [37]:
torch.cuda.is_available()

False

In [39]:
torch.device("cpu")

device(type='cpu')

In [43]:
a.device

device(type='cpu')

In [45]:
a.to('cpu')

tensor([0.])

In [46]:
a.device

device(type='cpu')

In [47]:
# requires grad

m = torch.ones(5, requires_grad=True)
m

tensor([1., 1., 1., 1., 1.], requires_grad=True)

In [82]:
# using autograd in pytorch

n = torch.randn(3, requires_grad=True)
print(n)

tensor([-0.9464, -1.3081, -0.0702], requires_grad=True)


In [72]:
# doesn't have any gradient
n.grad

In [65]:
o = n + 5

o # there will be function call AddBackward which gets called in BackPropagation

tensor([6.1291, 6.4492, 3.2354], grad_fn=<AddBackward0>)

In [52]:
# calculate gradient
o.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [73]:
p = n * n * 6
# there are different backpropagation functions
p

tensor([1.0665e-02, 4.9392e+00, 2.1519e+01], grad_fn=<MulBackward0>)

In [74]:
p.backward(n)

In [75]:
# after some operation has been performed with n, it gets grad
print(n.grad)

tensor([2.1331e-02, 9.8784e+00, 4.3037e+01])


In [59]:
q = p.mean()
print(q)

tensor(12.4202, grad_fn=<MeanBackward0>)


In [60]:
q.shape

torch.Size([])

In [58]:
n.grad

tensor([-3.8231, -9.1617,  0.8998])

In [76]:
# requires_grad_(False)
n.requires_grad_(False)
p = n * n
p

tensor([1.7776e-03, 8.2320e-01, 3.5864e+00])

In [78]:
# n.detach()
o = n.detach()
f = o * 2
f

tensor([-2.0147, -2.4390,  3.9974])

In [79]:
with torch.no_grad():
    p = n + 2
    print(p)

tensor([0.9926, 0.7805, 3.9987])


In [85]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_op = (weights * 3).sum()
    model_op.backward()
    print(weights.grad)
    weights.grad.zero_()
print(model_op)

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)


In [87]:
weights = torch.ones(4, requires_grad=True)

In [None]:
optimizer = torch.optim.SGD(weights, lr=0.01)
z.backward()
weights.grad.zero_()

### Entering into Back Propagation

In [92]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

# w is a variable
w = torch.tensor(1.0, requires_grad=True)

In [93]:
# forward pass
y_hat = x * w
loss = (y_hat - y) ** 2

In [94]:
print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [95]:
# backward pass
loss.backward()
print(w.grad)

## update weights
### do the backward and forward pass couple of iterations

tensor(-2.)
