### Numpy code

In [1]:
import numpy as np

# Example: Large matrices (adjust size as needed)
# n = 7000  # For very large matrices, ensure you have enough RAM
# A = np.random.rand(n, n).astype(np.float32)
# B = np.random.rand(n, n).astype(np.float32)

# C = np.dot(A, B)  # warm-up and Matrix multiplication

# %timeit -r 2 -o np.dot(A, B)

# print(f"Result shape: {C.shape}")
# print(f"Result type: {C.dtype}")


In [1]:
import torch
import numpy as np
import time
import sys

# Definimos N (Tamaño de matriz)
# Usamos 7000 como pide el PDF, o leemos de argumentos si se pasa por script
N = 7000
if len(sys.argv) > 1:
    try:
        N = int(sys.argv[1])
    except:
        pass

print(f"--- Multiplicación de Matrices {N}x{N} con PYTORCH ---")

if torch.cuda.is_available():
    # 1. Configurar dispositivo
    device = torch.device("cuda")
    print(f"Dispositivo: {torch.cuda.get_device_name(0)}")
    
    # 2. Generar datos directamente en GPU (evitamos copiar CPU->GPU)
    # PyTorch usa float32 por defecto, ideal para GPUs
    t0 = time.time()
    A_gpu = torch.rand(N, N, device=device)
    B_gpu = torch.rand(N, N, device=device)
    print(f"Tiempo de generación de datos en VRAM: {time.time()-t0:.4f} s")
    
    # 3. Warm-up (Calentamiento)
    # Importante para cargar las librerías cuBLAS
    torch.mm(A_gpu, B_gpu)
    torch.cuda.synchronize()
    
    # 4. Medición
    start = time.time()
    C_gpu = torch.mm(A_gpu, B_gpu) # Multiplicación de matrices
    torch.cuda.synchronize()       # Esperar a que acabe
    end = time.time()
    
    elapsed = end - start
    
    # 5. Calcular GFLOPS
    # La fórmula para mult. matrices es 2 * N^3 operaciones
    ops = 2 * (N**3)
    gflops = (ops / elapsed) / 1e9
    
    print(f"Tiempo de ejecución (PyTorch): {elapsed:.5f} s")
    print(f"Rendimiento Gráfico: {gflops:.2f} GFLOPS")
    
else:
    print("¡Error! No se detectó GPU compatible con PyTorch.")

--- Multiplicación de Matrices 7000x7000 con PYTORCH ---
Dispositivo: NVIDIA GeForce GTX 1080


    Found GPU0 NVIDIA GeForce GTX 1080 which is of cuda capability 6.1.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (7.0) - (12.0)
    
    Please install PyTorch with a following CUDA
    configurations:  12.6 following instructions at
    https://pytorch.org/get-started/locally/
    
NVIDIA GeForce GTX 1080 with CUDA capability sm_61 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_70 sm_75 sm_80 sm_86 sm_90 sm_100 sm_120.
If you want to use the NVIDIA GeForce GTX 1080 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



AcceleratorError: CUDA error: no kernel image is available for execution on the device
Search for `cudaErrorNoKernelImageForDevice' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


Convirtiendo notebooks...
==========================================
 EJECUTANDO MULTIPLICACIÓN MATRICES (PyTorch)
==========================================
--- Multiplicación de Matrices 7000x7000 con PYTORCH ---
Dispositivo: NVIDIA GeForce RTX 2080 Ti
Tiempo de generación de datos en VRAM: 0.7333 s
Tiempo de ejecución (PyTorch): 0.06051 s
Rendimiento Gráfico: 11337.34 GFLOPS
==========================================
 EJECUTANDO PI CON PYTORCH
==========================================
--- Calculando PI con PyTorch (N=100000000) ---
PI: 3.1412896
Tiempo PyTorch: 1.75892 s