conda environment used:

`conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia`

https://pytorch.org/get-started/locally/#linux-python

In [2]:
import torch
assert torch.cuda.is_available()

In [7]:
torch.cuda.device_count()

1

In [8]:
torch.cuda.current_device()

0

In [10]:
torch.cuda.get_device_name(torch.cuda.current_device())

'NVIDIA GeForce GTX 1080 Ti'

In [11]:
x = torch.rand(5, 3)
print(x)

tensor([[0.0034, 0.2483, 0.7439],
        [0.4013, 0.8217, 0.9261],
        [0.1986, 0.5877, 0.4049],
        [0.4868, 0.2526, 0.3390],
        [0.3640, 0.1574, 0.0906]])


---

In [14]:
import torch
import numpy as np

np.set_printoptions(precision=3)
a = [1, 2, 3]
b = np.array([4, 5, 6], dtype=np.int32)
t_a = torch.tensor(a)
t_b = torch.from_numpy(b)

print(t_a)
print(t_b)

tensor([1, 2, 3])
tensor([4, 5, 6], dtype=torch.int32)


In [15]:
torch.manual_seed(1)
t1 = 2 * torch.rand(5, 2) - 1
t2 = torch.normal(mean=0, std=1, size=(5, 2))

In [16]:
print(t1)
print(t2)

tensor([[ 0.5153, -0.4414],
        [-0.1939,  0.4694],
        [-0.9414,  0.5997],
        [-0.2057,  0.5087],
        [ 0.1390, -0.1224]])
tensor([[ 0.8590,  0.7056],
        [-0.3406, -1.2720],
        [-1.1948,  0.0250],
        [-0.7627,  1.3969],
        [-0.3245,  0.2879]])


In [17]:
results = torch.multiply(t1, t2)
results

tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]])

In [18]:
results.device

device(type='cpu')

In [19]:
device_cuda = torch.device('cuda')
results = torch.multiply(t1.to(device=device_cuda), t2.to(device=device_cuda))
results

tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]], device='cuda:0')

In [20]:
results.device

device(type='cuda', index=0)

---

In [9]:
import time

matrix_size = 32*512

x = torch.randn(matrix_size, matrix_size)
y = torch.randn(matrix_size, matrix_size)

print("** CPU Speed **")
start = time.time()
result = torch.matmul(x, y)
end = time.time()

print(end - start)
print(f"device: {result.device}")

torch.cuda.synchronize()

** CPU Speed **
15.975645542144775
device: cpu


In [11]:
x_gpu = x.to(torch.device('cuda'))
y_gpu = y.to(torch.device('cuda'))

print("** GPU Speed **")
start = time.time()
gpu_result = torch.matmul(x_gpu, y_gpu)
end = time.time()

print(end - start)
print(f"device: {gpu_result.device}")

torch.cuda.synchronize()

** GPU Speed **
0.0003991127014160156
device: cuda:0
