### Get GPU details

In [34]:
import torch as t

#Prints GPU and CUDA information if available
if t.cuda.is_available():
    print("GPU details:")
    print("************")
    print("GPU name = ", t.cuda.get_device_name())
    print("No of GPUs available = ", t.cuda.device_count())
    cur_device_name = t.cuda.current_device()
    print("Current device name = ", cur_device_name)
    print("Current device properties = ", t.cuda.get_device_properties(cur_device_name))
else:
    print("No GPU is available.")
    print("If there are no GPUs available in your machine, try Google CoLab which is freely available on the web.")

GPU details:
************
GPU name =  GeForce RTX 2060
No of GPUs available =  1
Current device name =  0
Current device properties =  _CudaDeviceProperties(name='GeForce RTX 2060', major=7, minor=5, total_memory=5931MB, multi_processor_count=30)


### Creating an object to represent the CUDA device

In [35]:
cuda0 = t.device('cuda:0')
print(cuda0)

cuda:0


### Create two tensors at current CUDA and add them

In [36]:
a = t.ones(3, 3, device = cuda0)
print("a = ", a)
b = t.full((3,3), 5, device = cuda0)
print("b = ", b)
c = a + b #To perform the operation at CUDA both a anb b should be at the CUDA
print("c = ", c)

a =  tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')
b =  tensor([[5., 5., 5.],
        [5., 5., 5.],
        [5., 5., 5.]], device='cuda:0')
c =  tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]], device='cuda:0')


### Performance - Numpy Vs. CPU tensors Vs. GPU tensors

In [37]:
import numpy as np

#### Numpy

In [38]:
%%time
for i in range(10000):
    a = np.random.randn(100, 100) #Generate random numbers from standard normal distribution
    b = np.random.randn(100, 100)
    c = np.matmul(a, b)

CPU times: user 40.4 s, sys: 482 ms, total: 40.9 s
Wall time: 6.82 s


#### CPU tensors

In [39]:
%%time
for i in range(10000):
    a = t.randn(100, 100) #Generate random numbers from standard normal distribution
    b = t.randn(100, 100)
    c = t.matmul(a, b)

CPU times: user 8.04 s, sys: 93.9 ms, total: 8.13 s
Wall time: 1.37 s


#### GPU tensors

In [40]:
%%time
for i in range(10000):
    gpu_a = t.randn(100, 100, device=cuda0) #Generate random numbers from standard normal distribution
    gpu_b = t.randn(100, 100, device=cuda0)
    gpu_c = t.matmul(gpu_a, gpu_b)

CPU times: user 562 ms, sys: 209 ms, total: 770 ms
Wall time: 769 ms


### Perform operations on a GPU

In [41]:
if t.cuda.is_available():
    gpu = t.device("cuda")
    x = t.ones(5, device=gpu) # x is on GPU
    y = t.ones(5)
    y = y.to(gpu) # y is transfered to GPU
    z = x+y   

print(z)
z = z.to("cpu") # z is now on CPU, remember that numpy works only on CPUs
print(z)

tensor([2., 2., 2., 2., 2.], device='cuda:0')
tensor([2., 2., 2., 2., 2.])
