### Numpy code

In [2]:
import numpy as np

# Example: Large matrices (adjust size as needed)
n = 7000  # For very large matrices, ensure you have enough RAM
A = np.random.rand(n, n).astype(np.float32)
B = np.random.rand(n, n).astype(np.float32)

C = np.dot(A, B)  # warm-up and Matrix multiplication

%timeit -r 2 -o np.dot(A, B)

print(f"Result shape: {C.shape}")
print(f"Result type: {C.dtype}")


1.05 s ± 248 μs per loop (mean ± std. dev. of 2 runs, 1 loop each)
Result shape: (7000, 7000)
Result type: float32


In [None]:
import torch

device = torch.device('cuda')
print("Device:", device)

# Crear matrices en Torch (float32)
A_torch = torch.rand((n, n), dtype=torch.float32, device=device)
B_torch = torch.rand((n, n), dtype=torch.float32, device=device)

# Warm-up
C_t = A_torch @ B_torch
torch.cuda.synchronize()

# Medición
def torch_mm():
    out = A_torch @ B_torch
    torch.cuda.synchronize()
    return out

%timeit -r 2 -o torch_mm()

print("Result shape:", tuple(C_t.shape))
print("Result dtype:", C_t.dtype)

Resultado tras lanzarlo a la cola bohr-gpu:
```text
667 ms ± 1.49 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)
Result shape: (7000, 7000)
Result type: float32
Device: cuda
49.8 ms ± 160 μs per loop (mean ± std. dev. of 2 runs, 10 loops each)
Result shape: (7000, 7000)
Result dtype: torch.float32
```

A continuación se resuelve el ejercicio pi con *pytorch*

In [None]:
import time

def calc_pi_torch(x, y):
    hits = (x*x + y*y) < 1.0
    M = hits.sum()

    return 4.0 * M / N

N = 10_000_000

x = torch.empty(N, device = device, dtype = torch.float32).uniform_(-1.0, 1.0)
y = torch.empty(N, device = device, dtype = torch.float32).uniform_(-1.0, 1.0)

# Warm-up
calc_pi_torch(x, y)
torch.cuda.synchronize()

t0 = time.perf_counter()
pi = calc_pi_torch(x, y)
torch.cuda.synchronize()
t1 = time.perf_counter()

print(f"pi (torch) = {pi.item():.6f}")
print(f"Tiempo total (torch): {(t1 - t0)*1e3:.3f} ms")

Resultado tras lanzarlo a la cola bohr-gpu:
```text
pi (torch) = 3.142135
Tiempo total (torch): 0.999 ms
```