In [5]:
import torch
import time

# Function to perform matrix multiplication and measure time
def benchmark(device, size=10000):
    # Create random tensors
    A = torch.randn(size, size, device=device)
    B = torch.randn(size, size, device=device)
    
    # Measure time
    start_time = time.time()
    C = torch.matmul(A, B)
    torch.cuda.synchronize() if device == "cuda" else None  # Synchronize for accurate timing on CUDA (not used here, just for reference)
    end_time = time.time()
    
    return end_time - start_time

# Check if MPS (Metal Performance Shaders) is available
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    print("Using MPS (Metal Performance Shaders) for GPU acceleration")
else:
    mps_device = torch.device("cpu")
    print("MPS not available, using CPU")

# Set CPU device
cpu_device = torch.device("cpu")

# Run benchmark on CPU
cpu_time = benchmark(cpu_device)
print(f"CPU time: {cpu_time:.4f} seconds")

# Run benchmark on MPS (GPU)
if torch.backends.mps.is_available():
    mps_time = benchmark(mps_device)
    print(f"MPS GPU time: {mps_time:.4f} seconds")
else:
    mps_time = None
    print("MPS not available")

# Calculate speedup
if mps_time:
    speedup = cpu_time / mps_time
    print(f"Speedup: {speedup:.2f}x")

Using MPS (Metal Performance Shaders) for GPU acceleration
CPU time: 0.8511 seconds
MPS GPU time: 0.0002 seconds
Speedup: 4451.00x
