<a href="https://colab.research.google.com/github/sidh26/ANN/blob/main/classwork/ANN_20th_Feb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch

# Create empty tensor
x = torch.empty(2, 5)

type(x), x.size()

(torch.Tensor, torch.Size([2, 5]))

In [3]:
# Fill values
x.fill_(2.5)
x, x.mean(), x.std(), x.sum()

(tensor([[2.5000, 2.5000, 2.5000, 2.5000, 2.5000],
         [2.5000, 2.5000, 2.5000, 2.5000, 2.5000]]),
 tensor(2.5000),
 tensor(0.),
 tensor(25.))

In [4]:
# Use tensor value as scalar
x.mean().item()

2.5

In [6]:
x = torch.tensor([[1, 2, 3], [11, 22, 33]])

In [7]:
# Accessing values in tensor
x[1, 2]

tensor(33)

In [11]:
x = torch.empty(2,4).random_(42)
x

tensor([[39.,  7., 29., 24.],
        [25., 29., 28., 31.]])

In [12]:
x[0], x[-1], x[1, 1:3], x[:, 0]

(tensor([39.,  7., 29., 24.]),
 tensor([25., 29., 28., 31.]),
 tensor([29., 28.]),
 tensor([39., 25.]))

In [13]:
# Operations on Tensors

x = torch.tensor([10, 20, 30])
y = torch.tensor([4, 5, 6])
x+y, x*y, x**2

(tensor([14, 25, 36]), tensor([ 40, 100, 180]), tensor([100, 400, 900]))

In [3]:
# Devices

x = torch.zeros(1, 3)
y = x.long()
z = x.to('cuda')

x.dtype, x.device, y.dtype, y.device, z.dtype, z.device

(torch.float32,
 device(type='cpu'),
 torch.int64,
 device(type='cpu'),
 torch.float32,
 device(type='cuda', index=0))

In [7]:
# 'Playing around while he takes a call'
x = torch.tensor([[1, 2], [11, 22]])

In [8]:
x*x

tensor([[  1,   4],
        [121, 484]])

In [9]:
torch.matmul(x, x)

tensor([[ 23,  46],
        [253, 506]])

In [12]:
# Autograd
# tensor - value, gradient

x = torch.tensor([1.0, 2.0])
y = torch.tensor([3.0, 4.0])
z = torch.tensor([5.0, 6.0])

print(x.requires_grad, (x+y).requires_grad)

z.requires_grad = True
print((x+z).requires_grad)

False False
True


In [13]:
t = torch.tensor([1.0, 2.0, 4.0]).requires_grad_()
u = torch.tensor([4.0, 8.0]).requires_grad_()

a = t.pow(2).sum() + u.log().sum()

In [15]:
torch.autograd.grad(a, (t, u)) # Calculate da/dt and da/du

(tensor([2., 4., 8.]), tensor([0.2500, 0.1250]))

In [None]:
# Gradient Accumulation

In [18]:
x = torch.tensor([-3.0, 4.0, 5.0]).requires_grad_()
u = x.pow(3).sum()

u.backward(retain_graph=True) # same as autograd.grad but dont need to specify dervative wrt what
print(x.grad) # 3x^2

u.backward(retain_graph=True) # retain_graph=True is required if backward() is being called multiple times on the same variable
print(x.grad) # Because of gradient accumulation, this value changes ie gradients get added

x.grad.zero_() # if you set gradients to 0 then you get the previous values again
u.backward()
print(x.grad)

tensor([27., 48., 75.])
tensor([ 54.,  96., 150.])
tensor([27., 48., 75.])


In [22]:
a = torch.tensor([2.0, 3.0, 5.0]).requires_grad_()
b = torch.tensor([25.0, 50.0]).requires_grad_()
c = a.pow(2).sum() + b.log().sum()

c.backward(retain_graph=True)
print(a.grad)
print(b.grad)
print('\n')

c.backward(retain_graph=True)
print(a.grad)
print(b.grad)
print('\n')

a.grad.zero_()
b.grad.zero_()
c.backward()
print(a.grad)
print(b.grad)

tensor([ 4.,  6., 10.])
tensor([0.0400, 0.0200])


tensor([ 8., 12., 20.])
tensor([0.0800, 0.0400])


tensor([ 4.,  6., 10.])
tensor([0.0400, 0.0200])


In [26]:
# Weight updation with autograd
a = torch.tensor(0.5).requires_grad_()
b = torch.tensor(-0.5).requires_grad_()

eta = 0.1

for k in range(100):
    l = (a-1)**2 + (b+1)**2 + (a-b)**2
    ga, gb = torch.autograd.grad(l, (a, b))

    with torch.no_grad(): # typically used when updating weights to not touch the gradients
        a -= eta * ga
        b -= eta * gb

print(a.item(), b.item())

0.3333333432674408 -0.3333333432674408


In [35]:
# Weight updation with backward
a = torch.tensor(0.5).requires_grad_()
b = torch.tensor(-0.5).requires_grad_()

eta = 0.1

for k in range(100):
    l = (a-1)**2 + (b+1)**2 + (a-b)**2
    l.backward(retain_graph=True)

    with torch.no_grad():
        a -= eta * a.grad
        b -= eta * b.grad
    a.grad.zero_()
    b.grad.zero_()

print(a.item(), b.item())

0.3333333432674408 -0.3333333432674408


In [59]:
# Creating NN with pytorch
import torch.nn as nn

x = torch.randn(10,3)
y = torch.randn(10,2)

In [52]:
# Create fully connected layer
linear = nn.Linear(3, 2)
# print('w: ', linear.weight)
# print('b: ', linear.bias)

for param in linear.parameters():
    print(param)

Parameter containing:
tensor([[-0.2530,  0.1194, -0.3674],
        [-0.0368, -0.3198,  0.2211]], requires_grad=True)
Parameter containing:
tensor([-0.5329,  0.1109], requires_grad=True)


In [39]:
# Build loss function and optimizer
criterion = nn.MSELoss()   
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

# Forward pass
pred = linear(x)

# Computing loss
loss = criterion(pred, y)
print(loss)

# Backward propagation
loss.backward()

# Display Gradients
print(linear.weight.grad)
print(linear.bias.grad)

# Weight updation
optimizer.step()

# New weights
for param in linear.parameters():
    print(param)

tensor(1.8051, grad_fn=<MseLossBackward>)
tensor([[ 0.2293,  1.3982,  0.1628],
        [ 0.1783, -0.2106, -0.2346]])
tensor([-0.8522,  0.0899])
Parameter containing:
tensor([[-0.2684,  0.5277,  0.4786],
        [ 0.1238,  0.1591, -0.4026]], requires_grad=True)
Parameter containing:
tensor([-0.0598,  0.2942], requires_grad=True)


In [53]:
# Creating a loop
criterion = nn.MSELoss()   
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

for k in range(50):
    pred = linear(x)
    loss = criterion(pred, y)
    loss.backward()

    with torch.no_grad():
        optimizer.step()
    optimizer.zero_grad()

    if k%10==0:
        print("Weights: ")
        for param in linear.parameters():
            print(param)
        print('Loss: ', loss.item())
        print('\n')

Weights: 
Parameter containing:
tensor([[-0.2522,  0.1200, -0.3658],
        [-0.0344, -0.3229,  0.2265]], requires_grad=True)
Parameter containing:
tensor([-0.5320,  0.1082], requires_grad=True)
Loss:  1.1879478693008423


Weights: 
Parameter containing:
tensor([[-0.2438,  0.1256, -0.3505],
        [-0.0114, -0.3510,  0.2786]], requires_grad=True)
Parameter containing:
tensor([-0.5235,  0.0832], requires_grad=True)
Loss:  1.1365453004837036


Weights: 
Parameter containing:
tensor([[-0.2357,  0.1302, -0.3362],
        [ 0.0096, -0.3745,  0.3270]], requires_grad=True)
Parameter containing:
tensor([-0.5156,  0.0623], requires_grad=True)
Loss:  1.0945422649383545


Weights: 
Parameter containing:
tensor([[-0.2279,  0.1341, -0.3227],
        [ 0.0289, -0.3942,  0.3721]], requires_grad=True)
Parameter containing:
tensor([-0.5084,  0.0448], requires_grad=True)
Loss:  1.0599350929260254


Weights: 
Parameter containing:
tensor([[-0.2204,  0.1372, -0.3101],
        [ 0.0467, -0.4107,  0.4141]

In [69]:
# Creating Multi-layer NN with pytorch
import torch.nn as nn

x = torch.randn(10,3)
y = torch.randn(10,1)

In [70]:
# Stacking Multiple Layers
model = nn.Sequential(
    nn.Linear(3,2),
    nn.Sigmoid(),
    nn.Linear(2,1)
)

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.5639, -0.3253,  0.1210],
        [-0.3249,  0.4802, -0.4242]], requires_grad=True)
Parameter containing:
tensor([ 0.5609, -0.0620], requires_grad=True)
Parameter containing:
tensor([[-0.5743,  0.0510]], requires_grad=True)
Parameter containing:
tensor([0.3079], requires_grad=True)


In [71]:
# Creating a loop
criterion = nn.MSELoss()   
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for k in range(100):
    pred = model(x)
    loss = criterion(pred, y)
    loss.backward()

    with torch.no_grad():
        optimizer.step()
    optimizer.zero_grad()

    if k%10==0:
        # print("Weights: ")
        # for param in model.parameters():
        #     print(param)
        print('Loss: ', loss.item())
        # print('\n')

Loss:  1.1651668548583984
Loss:  1.1225628852844238
Loss:  1.1003246307373047
Loss:  1.0885766744613647
Loss:  1.0822478532791138
Loss:  1.0787256956100464
Loss:  1.0766608715057373
Loss:  1.0753567218780518
Loss:  1.0744515657424927
Loss:  1.0737582445144653
