<a href="https://colab.research.google.com/github/priyeshkapadia/dl_pytorch_pluralsight/blob/main/CUDASemantics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

In [2]:
torch.cuda.is_available()

True

In [3]:
# Initialise PyTorch CUDA state, required when working with PyTorch's C API
# Automatically initialised when working with Python
torch.cuda.init()

In [4]:
# Defaulyt cuda device
torch.cuda.current_device()

0

In [5]:
torch.cuda.device_count()

1

In [6]:
# To see how much memory is occupied by the tensors allocated
torch.cuda.memory_allocated()

0

In [7]:
# Cached memory used to speed up memory allocations/deallocations of tensors on GPUs
torch.cuda.memory_cached()



0

In [8]:
# Shows the default cua device
cuda = torch.device('cuda')
cuda

device(type='cuda')

In [9]:
# To explicitly define a cuda device
cuda0 = torch.device('cuda:0')
cuda0

device(type='cuda', index=0)

In [13]:
x = torch.tensor([10., 20.])
x

tensor([10., 20.])

In [16]:
x_default = torch.tensor([10., 20.], device=cuda)
x_default

tensor([10., 20.], device='cuda:0')

In [17]:
x0 = torch.tensor([10., 20.], device=cuda0)
x0

tensor([10., 20.], device='cuda:0')

In [18]:
# Creates a copy on the default cuda device
y = x.cuda()
y

tensor([10., 20.], device='cuda:0')

In [19]:
with torch.cuda.device(0):      # specifies the default cuda device inside the with block
    a = torch.tensor([10., 20.])
    a1 = torch.tensor([10., 20.], device=cuda)

In [20]:
a

tensor([10., 20.])

In [21]:
a1

tensor([10., 20.], device='cuda:0')

In [22]:
# creates a copy on the specified cuda device
# can be used to copy data from one cuda device to another
b = a.to(device=cuda0)

In [23]:
b

tensor([10., 20.], device='cuda:0')

In [24]:
# cannot perform operations on data across different devices, i.e. different GPUs

In [25]:
torch.cuda.memory_allocated()

3584

In [26]:
torch.cuda.memory_cached()



2097152

In [27]:
# Remove unused memory in cache
torch.cuda.empty_cache()

In [29]:
# In this case no memory is unused so the value remains the same
torch.cuda.memory_cached()

2097152

In [30]:
# Prefix functions with new_ to ensure tensors are created on the same device
preserve_context = x.new_full([2, 2], fill_value=1.1)
preserve_context

tensor([[1.1000, 1.1000],
        [1.1000, 1.1000]])

In [31]:
preserve_context0 = x0.new_full([2, 2], fill_value=1.1)
preserve_context0

tensor([[1.1000, 1.1000],
        [1.1000, 1.1000]], device='cuda:0')