In [1]:
import torch

# Defenition

In [3]:
A = torch.tensor(
    [
        [1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]
    ]
    )

In [7]:
print(f"A:\n{A}")


A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


# Operations

In [16]:
R = A + A

print(f"A:\n{A}\n\nR = A + A:\n{R}")


A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A + A:
tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.],
        [14., 16., 18.]])


In [17]:
R = A - A

print(f"A:\n{A}\n\nR = A - A:\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A - A:
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [18]:
R = A @ A

print(f"A:\n{A}\n\nR = A @ A:\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A @ A:
tensor([[ 30.,  36.,  42.],
        [ 66.,  81.,  96.],
        [102., 126., 150.]])


In [19]:
R = A * 2

print(f"A:\n{A}\n\nR = A * 2:\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A * 2:
tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.],
        [14., 16., 18.]])


In [20]:
R = A**2

print(f"A:\n{A}\n\nR = A**2:\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A**2:
tensor([[ 1.,  4.,  9.],
        [16., 25., 36.],
        [49., 64., 81.]])


In [21]:
R = A * A

print(f"A:\n{A}\n\nR = A * A:\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A * A:
tensor([[ 1.,  4.,  9.],
        [16., 25., 36.],
        [49., 64., 81.]])


In [22]:
R = A.sum(axis=0)

print(f"A:\n{A}\n\nR = A.sum(axis=0):\n{R}")

R = A.sum(axis=1)

print(f"A:\n{A}\n\nR = A.sum(axis=1):\n{R}")

R = A.sum()

print(f"A:\n{A}\n\nR = A.sum():\n{R}")

A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A.sum(axis=0):
tensor([12., 15., 18.])
A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A.sum(axis=1):
tensor([ 6., 15., 24.])
A:
tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

R = A.sum():
45.0


# Device

In [23]:
A.device

device(type='cpu')

In [26]:
A = A.to('cuda')

A.device

device(type='cuda', index=0)

In [27]:
B = torch.rand(3, 3)

B

tensor([[0.7701, 0.5956, 0.3034],
        [0.3982, 0.8988, 0.5656],
        [0.6475, 0.7389, 0.8601]])

In [28]:
B.device

device(type='cpu')

In [29]:
A @ B

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_CUDA_mm)

In [30]:
B = B.to('cuda')

In [31]:
A @ B

tensor([[ 3.5091,  4.6100,  4.0148],
        [ 8.9565, 11.3099,  9.2020],
        [14.4040, 18.0099, 14.3893]], device='cuda:0')

## CUDA Speed

In [32]:
from datetime import datetime

In [33]:
A = torch.rand(10000, 10000)
B = torch.rand(10000, 10000)

In [34]:
A.device

device(type='cpu')

In [35]:
B.device

device(type='cpu')

In [36]:
start_time = datetime.now()

A @ B

end_time = datetime.now()

time_difference = (end_time - start_time).total_seconds() * 10**3
print("Execution time of program is: ", time_difference, "ms")

Execution time of program is:  4191.782 ms


In [37]:
A = A.to('cuda')
B = B.to('cuda')

In [38]:
start_time = datetime.now()

A @ B

end_time = datetime.now()

time_difference = (end_time - start_time).total_seconds() * 10**3
print("Execution time of program is: ", time_difference, "ms")

Execution time of program is:  3.816 ms


# Gradient

## Example 1

In [40]:
a = torch.tensor([0.2])
b = torch.tensor([0.4], requires_grad=True)

In [44]:
a.requires_grad

False

In [45]:
b.requires_grad

True

In [48]:
c = a @ b

c

tensor(0.0800, grad_fn=<DotBackward0>)

In [49]:
c.requires_grad

True

In [50]:
c.backward()

In [51]:
a.grad

In [52]:
b.grad

tensor([0.2000])

## Example 2

In [53]:
a = torch.rand(5, 1)
b = torch.rand(5, 1, requires_grad=True)

In [54]:
a

tensor([[0.3122],
        [0.4603],
        [0.3355],
        [0.8409],
        [0.8746]])

In [55]:
b

tensor([[0.4492],
        [0.6671],
        [0.5093],
        [0.6298],
        [0.9048]], requires_grad=True)

In [56]:
c = a.T @ b

c

tensor([[1.9392]], grad_fn=<MmBackward0>)

In [57]:
c.backward()

In [58]:
a.grad

In [59]:
b.grad

tensor([[0.3122],
        [0.4603],
        [0.3355],
        [0.8409],
        [0.8746]])