# Cosine similarity speed

Calculating the dot product of a 512-dimensional vector with a matrix of 1M vectors takes ~0.05 seconds.

The time increases linearly with the number of vectors.

In [3]:
import numpy as np
import time

np.random.seed(42)

base_vector = np.random.random(512).astype(np.float32)
base_vector = base_vector / np.linalg.norm(base_vector)

target_sizes = [100, 1_000, 10_000, 100_000, 1_000_000]

for size in target_sizes:
    target_vectors = np.random.random((size, base_vector.shape[0])).astype(np.float32)
    norms = np.linalg.norm(target_vectors, axis=1)
    target_vectors = target_vectors / norms[:, np.newaxis]

    # warm-up
    np.dot(target_vectors, base_vector)

    num_runs = 10
    start_time = time.perf_counter()
    for _ in range(num_runs):
        np.dot(target_vectors, base_vector)
    end_time = time.perf_counter()
    print(f"Size: {size}, Time: {(end_time - start_time) / num_runs} seconds")

Size: 100, Time: 5.910000000142191e-06 seconds
Size: 1000, Time: 1.4350000000717955e-05 seconds
Size: 10000, Time: 0.0005769299999997202 seconds
Size: 100000, Time: 0.006369789999999398 seconds
Size: 1000000, Time: 0.06267658000000012 seconds
