In [26]:
import torch
import numpy as np

In [92]:
torch.cuda.is_available()

True

**Squeeze, UnSqueeze, Gather function in pytorch**

Use of squeeze, unsqueeze, gather function to compute desired action by indexing  
2d-array in the second dimension using action parameter 


In [93]:
two_dim_array = np.array( [[1,2,3,4],[3,2,34,12], [17,23,2,34], [27,3,1,3], [1,4,5,1]] , dtype=np.int64)

print(two_dim_array.shape)

actions = torch.LongTensor(np.array([1,3,2,0,1]))

print("Actions: ", actions)
# increase dimension
print("Unsqueze 1-d: ", actions.unsqueeze(0))
print("Unsqueze 1-d: ", actions.unsqueeze(1))
print("Unsqueze 1-d: ", actions.unsqueeze(-1))

print("**"*10)
indices = torch.LongTensor(two_dim_array)
print('Indices', indices)
print(indices.shape)

# gather is differentiable operation
# so will keep gradient as well with respect to final loss value

# along 2nd dimesion, use 2nd param i.e., one element index array to obtain item 
print("Gather: ", indices.gather(1, actions.unsqueeze(-1)))

print("Original Array: ", indices)
print("Max across dim 1: ", indices.max(1)[0], "argmax: ", indices.max(1)[1])
print("Max across dim 0: ", indices.max(0)[0], "argmax: ", indices.max(0)[1])

(5, 4)
Actions:  tensor([1, 3, 2, 0, 1])
Unsqueze 1-d:  tensor([[1, 3, 2, 0, 1]])
Unsqueze 1-d:  tensor([[1],
        [3],
        [2],
        [0],
        [1]])
Unsqueze 1-d:  tensor([[1],
        [3],
        [2],
        [0],
        [1]])
********************
Indices tensor([[ 1,  2,  3,  4],
        [ 3,  2, 34, 12],
        [17, 23,  2, 34],
        [27,  3,  1,  3],
        [ 1,  4,  5,  1]])
torch.Size([5, 4])
Gather:  tensor([[ 2],
        [12],
        [ 2],
        [27],
        [ 4]])
Original Array:  tensor([[ 1,  2,  3,  4],
        [ 3,  2, 34, 12],
        [17, 23,  2, 34],
        [27,  3,  1,  3],
        [ 1,  4,  5,  1]])
Max across dim 1:  tensor([ 4, 34, 34, 27,  5]) argmax:  tensor([3, 2, 3, 0, 2])
Max across dim 0:  tensor([27, 23, 34, 34]) argmax:  tensor([3, 2, 1, 2])


**Line fitting use least square error**

Gradient computation is manual.

In [98]:

def least_squares_fitting():
    weights = torch.rand((1,),requires_grad=False)
    bias = torch.rand((1,),  requires_grad=False)

    data_points = [ (torch.Tensor([i]), torch.Tensor([20 + 30*i]) ) for i in range(20)]
    inputs, actual_outputs = zip(*data_points)

    inputs = torch.Tensor(inputs)
    actual = torch.Tensor(actual_outputs)

    alpha = 0.01
    optimizer = torch.optim.Adam([weights,bias], lr=alpha)

    for epoch in range(500):
        for iteration in range(20):
            input = inputs[iteration]

            predict = weights * input + bias
            diff = (actual[iteration] - predict)
            loss  = -1/2*(diff)**2

            #grad_loss = grad(loss) = -diff
            grad_weight = (diff)*input
            grad_bias = (diff)*1

            #print("Gradients: ", grad_weight, grad_bias)

            weights +=  alpha*grad_weight
            bias +=  alpha*grad_bias

        if epoch%20 == 0:
            print(f"After Epoch: {epoch}, weights: {weights}, bias: {bias}")

least_squares_fitting()

After Epoch: 0, weights: tensor([30.6046]), bias: tensor([7.9204])
After Epoch: 20, weights: tensor([30.2899]), bias: tensor([14.2499])
After Epoch: 40, weights: tensor([30.1381]), bias: tensor([17.2628])
After Epoch: 60, weights: tensor([30.0656]), bias: tensor([18.6968])
After Epoch: 80, weights: tensor([30.0313]), bias: tensor([19.3796])
After Epoch: 100, weights: tensor([30.0148]), bias: tensor([19.7047])
After Epoch: 120, weights: tensor([30.0070]), bias: tensor([19.8594])
After Epoch: 140, weights: tensor([30.0034]), bias: tensor([19.9331])
After Epoch: 160, weights: tensor([30.0016]), bias: tensor([19.9681])
After Epoch: 180, weights: tensor([30.0006]), bias: tensor([19.9848])
After Epoch: 200, weights: tensor([30.0003]), bias: tensor([19.9928])
After Epoch: 220, weights: tensor([30.0002]), bias: tensor([19.9966])
After Epoch: 240, weights: tensor([30.0002]), bias: tensor([19.9983])
After Epoch: 260, weights: tensor([30.0001]), bias: tensor([19.9992])
After Epoch: 280, weights: 

**Gradient Propagation**

In [149]:

def least_squares_gradient_propagation():
    weights = torch.rand((1,),requires_grad=True)
    bias = torch.rand((1,),  requires_grad=True)

    data_points = [ (torch.Tensor([i]), torch.Tensor([20 + 30*i]) ) for i in range(20)]
    inputs, actual_outputs = zip(*data_points)

    inputs = torch.Tensor(inputs)
    actual = torch.Tensor(actual_outputs)

    # alpha with 0.01 seems to work in case of Adam
    # alpha = 0.2
    # optimizer = torch.optim.Adam([weights,bias], lr=alpha)
    # total_epochs = 4000
    # high value of alpha did not worked in case of SGD, providing momentum helped for faster convergence
    alpha = 0.01
    optimizer = torch.optim.SGD( [weights, bias], lr=alpha,momentum=0.95)
    total_epochs = 1000

    for epoch in range(total_epochs):
        optimizer.zero_grad()
        total_error = 0.0
        predicted = weights*inputs + bias
        loss = torch.mean((actual - predicted)**2)
        total_error += loss.item()
        loss.backward()
        optimizer.step()
        if epoch%500 == 0:
            print(f"Epoch: {epoch}, Error: {total_error}")

    print(f"Weights: {weights}, bias: {bias}")

least_squares_gradient_propagation()

Epoch: 0, Error: 120992.140625
Epoch: 500, Error: 1.1008524092304128e-09
Weights: tensor([30.0000], requires_grad=True), bias: tensor([20.0000], requires_grad=True)
