In [None]:
import random
import math
from typing import List, Tuple

rnd = random.Random(42)

# 1) Generate M random binary vectors of length N (Bernoulli(0.5))
def gen(M: int, N: int) -> List[List[int]]:
    X = [[1 if rnd.random() < 0.5 else 0 for _ in range(N)] for _ in range(M)]
    return X

# Cosine similarity for binary vectors
def cosine(a: List[int], b: List[int]) -> float:
    dot = 0
    na = 0
    nb = 0
    for ai, bi in zip(a, b):
        dot += ai & bi
        na += ai
        nb += bi
    if na == 0 or nb == 0:
        return 0.0  # define 0 when a or b is zero
    return dot / (math.sqrt(na) * math.sqrt(nb))

# Jaccard similarity = |A ∩ B| / |A ∪ B|
def jaccard(a: List[int], b: List[int]) -> float:
    inter = 0
    uni = 0
    for ai, bi in zip(a, b):
        inter += (ai & bi)
        uni += (ai | bi)
    if uni == 0:
        return 1.0  # both zero vectors -> identical
    return inter / uni

# Utility: compute mean and std (population std, divide by n) of a list
def mean_std(vals: List[float]) -> Tuple[float, float]:
    n = len(vals)
    if n == 0:
        return 0.0, 0.0
    s = sum(vals)
    mean = s / n
    v2 = sum((v - mean) ** 2 for v in vals)
    std = math.sqrt(v2 / n)
    return mean, std

# Run one experiment: all pairwise similarities among M vectors of length N
def run(M: int, N: int) -> None:
    X = gen(M, N)
    cos = []
    jac = []
    for i in range(M):
        for j in range(i + 1, M):
            cos.append(cosine(X[i], X[j]))
            jac.append(jaccard(X[i], X[j]))
    c_mean, c_std = mean_std(cos)
    j_mean, j_std = mean_std(jac)
    print(f"N={N}, pairs={len(cos)}")
    print(f"Cosine   mean={c_mean:.4f} std={c_std:.4f}")
    print(f"Jaccard  mean={j_mean:.4f} std={j_std:.4f}")
    print()

if __name__ == "__main__":
    M = 100  # 100 random binary vectors
    run(M, 100)   # observe approx Gaussian-like spread
    run(M, 500)   # larger N -> concentration
    run(M, 1000)
