In [1]:
import torch

In [None]:
mat = torch.zeros((3,4))
mat[:,-1] = 1
mat

tensor([[0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]])

In [9]:
mat = torch.arange(0, 10)
mat
reversed(mat)

tensor([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [17]:
mat = torch.rand((3, 3))

mat[mat < 0.5] = 0
mat

tensor([[0.9209, 0.8220, 0.5378],
        [0.6088, 0.7381, 0.0000],
        [0.6966, 0.0000, 0.0000]])

### View vs Arrange

View only works if the data contiguous in memory. This means that tensors is stored in row major order (like in c). When doing something like a transpose, what happens is that the data is not longer contiguous. This is so because the stride is changed instead. 

For example, 

x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
[1, 2, 3, 4, 5, 6] <- in memory
x.stride() = (3, 1) #move 3 per row and 1 per column

y = x.T
y = [[1, 4],
     [2, 5],
     [3, 6]]

→ [1, 2, 3, 4, 5, 6] It is stored the same way in memory still 
So, the stride for y is not y.stride() = (1, 3). 

This is not contiguous. Thus, we cannot use view.

The alternative is using reshape. Reshape will make the tensor contigous and then it will return the view. 

In [46]:
mat = torch.rand((2,3,4))
print(mat)
reshape = mat.reshape(4,3,2)[1, :, :]
print(reshape)
view = mat.view(4,3,2)[1, :, :]
print(view)
mat.flatten()

tensor([[[0.1713, 0.3745, 0.6764, 0.9429],
         [0.3761, 0.2888, 0.6360, 0.0901],
         [0.6549, 0.6014, 0.3872, 0.3306]],

        [[0.5931, 0.1051, 0.4604, 0.2145],
         [0.4054, 0.8765, 0.9626, 0.3082],
         [0.9629, 0.2148, 0.9040, 0.8521]]])
tensor([[0.6360, 0.0901],
        [0.6549, 0.6014],
        [0.3872, 0.3306]])
tensor([[0.6360, 0.0901],
        [0.6549, 0.6014],
        [0.3872, 0.3306]])


tensor([0.1713, 0.3745, 0.6764, 0.9429, 0.3761, 0.2888, 0.6360, 0.0901, 0.6549,
        0.6014, 0.3872, 0.3306, 0.5931, 0.1051, 0.4604, 0.2145, 0.4054, 0.8765,
        0.9626, 0.3082, 0.9629, 0.2148, 0.9040, 0.8521])

In [55]:
four = torch.rand((4,4))
print(four)
four[1:3, 1:3]

tensor([[0.8251, 0.7888, 0.3800, 0.0900],
        [0.3225, 0.2092, 0.1795, 0.3958],
        [0.7340, 0.2396, 0.1373, 0.7040],
        [0.1934, 0.9287, 0.3233, 0.2934]])


tensor([[0.2092, 0.1795],
        [0.2396, 0.1373]])

In [67]:
#The diemsion that is one is being virtually copied for the operation
col = torch.ones((3,1)) # With broadcasting, (3,4)
print(col)
row = torch.ones((1,4)) # With broadcasting, (3,4)
print(row)

col + row

tensor([[1.],
        [1.],
        [1.]])
tensor([[1., 1., 1., 1.]])


tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])

In [100]:
ten = torch.arange(1,10).view(3,3)
print(ten)
# print(torch.softmax(ten.to(torch.float32), dim=1))
print(torch.sum(ten, dim=1, keepdim=True)) #(3,1)
ten = ten/torch.sum(ten, dim=1, keepdim=True)
ten

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([[ 6],
        [15],
        [24]])


tensor([[0.1667, 0.3333, 0.5000],
        [0.2667, 0.3333, 0.4000],
        [0.2917, 0.3333, 0.3750]])

In [106]:
a = torch.arange(1,4)
b = torch.arange(4,7)

a@b

tensor(32)

In [136]:
x = torch.tensor(2.0, requires_grad=True)
z = torch.tensor(3.0, requires_grad=True)

y = x**3 + 2*z

y.backward() 
print(x.grad, z.grad)

tensor(12.) tensor(2.)


In [208]:
x = torch.rand((3, 4), requires_grad=True)

def func(x):
    return x**2 + torch.sin(x)

y = func(x)
temp = torch.ones_like(y) #Helps implement vector jacobian product. Explicitly when the output is not a scalar. 
y.backward(gradient=temp)
print(x.grad)

tensor([[1.8134, 2.4482, 2.3711, 1.8886],
        [2.0594, 1.7317, 1.6445, 2.0376],
        [2.4464, 2.0109, 1.8500, 2.4485]])


In [230]:
def softmax(x):
    return torch.exp(x)/torch.sum(torch.exp(x), dim=-1, keepdim=True)


softmax test passed.


In [248]:
batch1 = torch.rand((3, 2, 2))
batch2 = torch.rand((3, 2, 1))

torch.bmm(batch1, batch2)

tensor([[[0.7904],
         [0.4538]],

        [[1.3595],
         [0.3426]],

        [[0.5734],
         [0.3826]]])