In [3]:
import time

It is important to know, that we can not accelerate `for loops` with pytorch, but we can export vector and matrix multiplications to a GPU, which is magnitudes faster in calculating these.

In [20]:
matrix_size = int(2e4)

In [21]:
# Lets create an example test case on cpu first:
import numpy as np
np.random.seed(42)

matrix_a = np.random.random(size=(matrix_size, matrix_size))
matrix_b = np.random.random(size=(matrix_size, matrix_size))

start_time = time.time()
matrix_c = matrix_a @ matrix_b
stop_time = time.time()

print(f"CPU calculation with numpy took {stop_time - start_time} seconds")

CPU calculation with numpy took 46.01549983024597 seconds


In [22]:
# lets do the same example again on CPU but with pytorch:
import torch
torch.random.manual_seed(42)

matrix_a = torch.randn(size=(matrix_size, matrix_size))
matrix_b = torch.randn(size=(matrix_size, matrix_size))

start_time = time.time()
matrix_c = matrix_a @ matrix_b
stop_time = time.time()

print(f"CPU calculation with torch took {stop_time - start_time} seconds")

CPU calculation with torch took 23.7554988861084 seconds


In [23]:
# now lets try out the same calculations on a GPU

# 1. check if a GPU is available:
print(f"GPU is availavble: {torch.cuda.is_available()}")

GPU is availavble: True


In [26]:
# when a GPU is available, we can move our arrays from the CPU-RAM to a GPU-RAM:

matrix_a_gpu = matrix_a.to("cuda")
matrix_b_gpu = matrix_b.to("cuda")

# if we now call the computation, it will be performed on the GPU, since all matrices are on the GPU:
with torch.no_grad():  # This line will prevent pytorch to automatically built a computational derivative for us
    start_time = time.time()
    matrix_c_gpu = matrix_a_gpu @ matrix_b_gpu
    stop_time = time.time()

print(f"GPU calculation with torch took {stop_time - start_time} seconds")

GPU calculation with torch took 0.0005002021789550781 seconds


In [27]:
# to now get back the result to a numpy array on the cpu:

matrix_c_numpy = matrix_c_gpu.cpu().numpy()

In [34]:
matrix_c_numpy[:5, :5]  # lets just look at a view of the matrix, to not overload our terminal

array([[  -7.542859 ,  -58.914436 ,    1.2196629,   59.79352  ,
        -146.47404  ],
       [  74.833    ,  -95.34216  ,  328.45026  ,  -83.59669  ,
          -3.9073808],
       [-167.2056   , -172.70403  , -376.35532  ,  -76.38499  ,
           7.7029734],
       [ 289.7333   ,   78.743256 , -173.91449  ,  -58.590466 ,
          48.24929  ],
       [ 126.0523   ,  113.8273   ,  -47.19424  ,  108.30636  ,
         -58.073307 ]], dtype=float32)