Matrix multiplication of two square matrices

In [1]:
import numpy as np
import time
import psutil

def estimate_flops(matrix_size, iterations=10):
    A = np.random.rand(matrix_size, matrix_size)
    B = np.random.rand(matrix_size, matrix_size)

    # Warm-up
    np.dot(A, B)

    start = time.time()
    for _ in range(iterations):
        np.dot(A, B)
    end = time.time()

    elapsed_time = end - start
    total_ops = 2 * (matrix_size ** 3) * iterations  # FLOPs for matrix multiplication
    flops = total_ops / elapsed_time
    return flops / (2**30)  # Convert to GFLOPS


def get_available_ram():
    mem = psutil.virtual_memory()
    return mem.available  # in bytes

def matrix_memory_required(size):
    # Each float64 takes 8 bytes; two matrices A and B
    return 2 * (size ** 2) * 8  # in bytes

def benchmark_loop(max_size=9728, step=256):
    available_ram = get_available_ram()
    print(f"Available RAM: {available_ram / (2 ** 20):.2f} MiB\n") # :.2f -> print decimal value upto 2 decimal places

    for size in range(step, max_size + step, step):
        required_mem = matrix_memory_required(size)
        print(f"{required_mem/(2**20):.2f} MiB", end=" ")
        if required_mem > available_ram * 0.5:  # Use only up to 50% of available RAM
            print(f"Skipping size {size}x{size} - requires too much memory ({required_mem / (2 ** 20):.2f} MiB)")
            continue

        gflops = estimate_flops(size)    
        print(f"Size: {size}x{size} → Estimated GFLOPS: {gflops:.2f}")

benchmark_loop()

Available RAM: 25980.14 MiB

1.00 MiB Size: 256x256 → Estimated GFLOPS: 79.11
4.00 MiB Size: 512x512 → Estimated GFLOPS: 92.17
9.00 MiB Size: 768x768 → Estimated GFLOPS: 118.58
16.00 MiB Size: 1024x1024 → Estimated GFLOPS: 136.71
25.00 MiB Size: 1280x1280 → Estimated GFLOPS: 144.38
36.00 MiB Size: 1536x1536 → Estimated GFLOPS: 148.15
49.00 MiB Size: 1792x1792 → Estimated GFLOPS: 176.13
64.00 MiB Size: 2048x2048 → Estimated GFLOPS: 154.05
