In [58]:
import torch

In [59]:
torch.cuda.is_available()

True

In [60]:
# Initialize PyTorch’s CUDA state. You may need to call this explicitly if you are interacting 
# with PyTorch via its C API

torch.cuda.init()

In [61]:
torch.cuda.current_device()

0

In [62]:
torch.cuda.device_count()

2

In [63]:
torch.cuda.memory_allocated()

7168

In [64]:
torch.cuda.memory_cached()

1048576

In [65]:
cuda = torch.device('cuda')

cuda

device(type='cuda')

In [66]:
cuda0 = torch.device('cuda:0')
cuda1 = torch.device('cuda:1')
cuda2 = torch.device('cuda:2')

In [67]:
cuda0, cuda1, cuda2

(device(type='cuda', index=0),
 device(type='cuda', index=1),
 device(type='cuda', index=2))

In [91]:
x = torch.tensor([10., 20.])

x

tensor([10., 20.])

In [69]:
x_default = torch.tensor([10., 20.], device=cuda)

x_default

tensor([10., 20.], device='cuda:0')

In [70]:
x0 = torch.tensor([10., 20.], device=cuda0)

x0

tensor([10., 20.], device='cuda:0')

In [71]:
x1 = torch.tensor([10., 20.], device=cuda1)

x1

tensor([10., 20.], device='cuda:1')

In [72]:
x2 = torch.tensor([10., 20.], device=cuda2)

x2

RuntimeError: CUDA error: invalid device ordinal

### Returns a copy of this object in CUDA memory.

If this object is already in CUDA memory and on the correct device, then no copy is performed and the original object is returned.

In [92]:
# Transferring a tensor from CPU to GPU 0
y = x.cuda()

y

tensor([10., 20.], device='cuda:0')

In [74]:
# Transferring a tensor from GPU 1 to GPU 0
y0 = x1.cuda()

y0

tensor([10., 20.], device='cuda:0')

In [75]:
print('Outside with context: ', torch.cuda.current_device())

with torch.cuda.device(1):
    
    print('Inside with context: ', torch.cuda.current_device())

print('Outside with context again: ', torch.cuda.current_device())

Outside with context:  0
Inside with context:  1
Outside with context again:  0


In [76]:
with torch.cuda.device(1):
    
    a = torch.tensor([10., 20.])
    
    a0 = torch.tensor([10., 20.], device=cuda0)
    
    a1 = torch.tensor([10., 20.], device=cuda)

In [77]:
a

tensor([10., 20.])

In [78]:
a0

tensor([10., 20.], device='cuda:0')

In [79]:
a1

tensor([10., 20.], device='cuda:1')

In [80]:
b1 = a0.to(device=cuda1)

b1

tensor([10., 20.], device='cuda:1')

In [81]:
sum_a = a + a0

RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor

In [82]:
sum_a = a1 + a0

RuntimeError: binary_op(): expected both inputs to be on same device, but input a is on cuda:1 and input b is on cuda:0

In [83]:
sum_ax = a1 + x1

sum_ax

tensor([20., 40.], device='cuda:1')

In [84]:
torch.cuda.memory_allocated()

9728

In [85]:
torch.cuda.memory_cached()

1048576

In [86]:
torch.cuda.empty_cache()

In [87]:
torch.cuda.memory_cached()

1048576

### torch.Tensor.new_* functions preserve the type and the device of the original tensor

In [88]:
preserve_context = x.new_full([2, 2], fill_value=1.1)

preserve_context

tensor([[1.1000, 1.1000],
        [1.1000, 1.1000]])

In [89]:
preserve_context0 = x0.new_full([2, 2], fill_value=1.1)

preserve_context0

tensor([[1.1000, 1.1000],
        [1.1000, 1.1000]], device='cuda:0')

In [90]:
preserve_context1 = x1.new_full([2, 2], fill_value=1.1)

preserve_context1

tensor([[1.1000, 1.1000],
        [1.1000, 1.1000]], device='cuda:1')