In [1]:
import torch

# Defenition

In [2]:
A = torch.tensor(
    [
        [1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]
    ]
    )

A

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

# Operations

In [3]:
A + A

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.],
        [14., 16., 18.]])

In [4]:
A - A

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [5]:
A @ A

tensor([[ 30.,  36.,  42.],
        [ 66.,  81.,  96.],
        [102., 126., 150.]])

In [6]:
A * 2

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.],
        [14., 16., 18.]])

In [7]:
A**2

tensor([[ 1.,  4.,  9.],
        [16., 25., 36.],
        [49., 64., 81.]])

In [8]:
A * A

tensor([[ 1.,  4.,  9.],
        [16., 25., 36.],
        [49., 64., 81.]])

In [9]:
A.sum(axis=0)

tensor([12., 15., 18.])

In [10]:
A.sum(axis=1)

tensor([ 6., 15., 24.])

# Device

In [11]:
A.device

device(type='cpu')

In [12]:
A = A.to('cuda')

In [13]:
A.device

device(type='cuda', index=0)

In [14]:
B = torch.rand(3, 3)

B

tensor([[0.2292, 0.7523, 0.3381],
        [0.5637, 0.1692, 0.9883],
        [0.9951, 0.3423, 0.1994]])

In [15]:
B.device

device(type='cpu')

In [16]:
A @ B

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_CUDA_mm)

In [17]:
B = B.to('cuda')

In [18]:
A @ B

tensor([[ 4.3418,  2.1177,  2.9129],
        [ 9.7057,  5.9093,  7.4905],
        [15.0697,  9.7009, 12.0681]], device='cuda:0')

## CUDA Speed

In [19]:
from datetime import datetime

In [20]:
A = torch.rand(10000, 10000)
B = torch.rand(10000, 10000)

In [21]:
A.device

device(type='cpu')

In [22]:
B.device

device(type='cpu')

In [23]:
start_time = datetime.now()

A @ B

end_time = datetime.now()

time_difference = (end_time - start_time).total_seconds() * 10**3
print("Execution time of program is: ", time_difference, "ms")

Execution time of program is:  26341.388 ms


In [24]:
A = A.to('cuda')
B = B.to('cuda')

In [25]:
start_time = datetime.now()

A @ B

end_time = datetime.now()

time_difference = (end_time - start_time).total_seconds() * 10**3
print("Execution time of program is: ", time_difference, "ms")

Execution time of program is:  19.425 ms


# Gradient

## Example 1

In [26]:
a = torch.tensor([0.2])
b = torch.tensor([0.4], requires_grad=True)

In [27]:
a.requires_grad

False

In [29]:
b.requires_grad

True

In [30]:
c = a @ b

c

tensor(0.0800, grad_fn=<DotBackward0>)

In [31]:
c.requires_grad

True

In [32]:
c.backward()

In [33]:
a.grad

In [34]:
b.grad

tensor([0.2000])

## Example 2

In [35]:
a = torch.rand(5, 1)
b = torch.rand(5, 1, requires_grad=True)

In [36]:
a

tensor([[0.3332],
        [0.3824],
        [0.9758],
        [0.1457],
        [0.3392]])

In [37]:
b

tensor([[0.1544],
        [0.8050],
        [0.4637],
        [0.3794],
        [0.6585]], requires_grad=True)

In [38]:
c = a.T @ b

c

tensor([[1.0904]], grad_fn=<MmBackward0>)

In [39]:
c.backward()

In [40]:
a.grad

In [41]:
b.grad

tensor([[0.3332],
        [0.3824],
        [0.9758],
        [0.1457],
        [0.3392]])