In [3]:
import torch

def generate_almost_orthogonal_vectors(d, epsilon, max_trials=10**6, device='cpu'):
    num_required = 2 ** d
    accepted = []

    # Sample candidates in batches for efficiency
    batch_size = 4096
    trials = 0

    while len(accepted) < num_required and trials < max_trials:
        # Sample random vectors from normal distribution
        vecs = torch.randn(batch_size, d, device=device)
        vecs = vecs / vecs.norm(dim=1, keepdim=True)  # Normalize to unit norm

        for v in vecs:
            if len(accepted) == 0:
                accepted.append(v)
            else:
                existing = torch.stack(accepted)
                inner_products = torch.matmul(existing, v)
                if (inner_products.abs() < epsilon).all():
                    accepted.append(v)
                    if len(accepted) >= num_required:
                        break
        trials += batch_size

    if len(accepted) < num_required:
        print(f"Only generated {len(accepted)} vectors after {trials} trials.")
    return torch.stack(accepted)

In [4]:
vecs = generate_almost_orthogonal_vectors(10, 0.01)

Only generated 4 vectors after 1003520 trials.


In [5]:
vecs

tensor([[-0.2360,  0.0880, -0.4707,  0.5020, -0.1355,  0.0266,  0.0794,  0.2411,
          0.5999,  0.1402],
        [ 0.0909,  0.3779, -0.6879, -0.1684, -0.0524,  0.2773, -0.0616,  0.1366,
         -0.4930,  0.0477],
        [-0.1851, -0.2865, -0.0113, -0.1748, -0.6970,  0.1196,  0.5782, -0.0406,
         -0.1216, -0.0450],
        [-0.1784,  0.3946,  0.1203,  0.1603,  0.1877,  0.2677,  0.2862, -0.1088,
          0.0397, -0.7551]])

In [8]:
import torch
import math

def generate_low_inner_product_vectors(n, epsilon, device='cpu', seed=None):
    if seed is not None:
        torch.manual_seed(seed)

    # Compute k = exp(epsilon^2 * n / 4)
    k = int(math.exp((epsilon**2 * n) / 4))
    
    # Generate k x n random ±1 vectors
    signs = torch.randint(0, 2, (k, n), device=device, dtype=torch.float32) * 2 - 1  # ±1
    vectors = signs / math.sqrt(n)  # Normalize each to have norm 1

    # Compute inner products
    ip_matrix = torch.matmul(vectors, vectors.T)
    mask = torch.eye(k, device=device).bool()
    ip_matrix[mask] = 0  # zero out diagonal

    max_ip = ip_matrix.abs().max().item()
    success = (ip_matrix.abs() < epsilon).all().item()

    print(f"Generated {k} vectors in {n} dimensions.")
    print(f"Maximum off-diagonal inner product: {max_ip:.4f}")
    print("All inner products < ε:", success)

    return vectors


In [31]:
vecs = generate_low_inner_product_vectors(1000, 0.2)

Generated 22026 vectors in 1000 dimensions.
Maximum off-diagonal inner product: 0.1820
All inner products < ε: True


In [33]:
vecs

tensor([[-0.0316,  0.0316,  0.0316,  ...,  0.0316, -0.0316, -0.0316],
        [ 0.0316, -0.0316,  0.0316,  ...,  0.0316, -0.0316, -0.0316],
        [ 0.0316, -0.0316,  0.0316,  ...,  0.0316, -0.0316,  0.0316],
        ...,
        [ 0.0316,  0.0316, -0.0316,  ..., -0.0316, -0.0316,  0.0316],
        [ 0.0316,  0.0316,  0.0316,  ..., -0.0316,  0.0316,  0.0316],
        [-0.0316,  0.0316, -0.0316,  ...,  0.0316,  0.0316,  0.0316]])

In [None]:
torch.linalg.norm(vecs, dim=1)

In [74]:
int(math.exp((0.2**2 * 1000) / 4))

22026

In [82]:
proj = torch.randn(1000, 500)

In [83]:
stud_vecs = vecs @ proj
stud_vecs = stud_vecs / stud_vecs.norm(dim=1, keepdim=True)

In [84]:
stud_vecs.shape

torch.Size([22026, 500])

In [85]:
torch.linalg.norm(stud_vecs, dim=1)

tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000])

In [86]:
ip_matrix = torch.matmul(stud_vecs, stud_vecs.T)
mask = torch.eye(22026).bool()
ip_matrix[mask] = 0  # zero out diagonal

max_ip = ip_matrix.abs().max().item()
success = (ip_matrix.abs() < 0.2).float()


In [90]:
22026*22026 - success.sum()

tensor(107040.)

In [91]:
107040/2

53520.0