**Sanity check code: accepts embedding query file, ranks all other embeddings by their cosine similarity with it (descending)**

In [4]:
# Run this to activate venv for the terminal instance: .venv\Scripts\activate

from pathlib import Path
import numpy as np
from numpy.linalg import norm

In [5]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2))

In [6]:
emb_dir = Path("embeddings_out")
emb_files = sorted(emb_dir.glob("*_embedding2048.npy"))

query_file = emb_dir / "trend3vid6_32k_embedding2048.npy" # this file will be the query
query_vec = np.load(query_file)

results = []
for f in emb_files:
    if f == query_file:
        continue  # skip self
    vec = np.load(f)
    sim = cosine_similarity(query_vec, vec)
    results.append((f.stem, sim))

results.sort(key=lambda x: x[1], reverse=True)

# -------------------------------
# Print ranked results
# -------------------------------
print(f"Ranking embeddings vs {query_file.stem}:")
for name, sim in results:
    print(f"{name}: {sim:.4f}")

Ranking embeddings vs trend3vid6_32k_embedding2048:
trend3vid8_32k_embedding2048: 0.7436
trend5vid2_32k_embedding2048: 0.7363
trend5vid3_32k_embedding2048: 0.7244
trend3vid7_32k_embedding2048: 0.7235
trend5vid4_32k_embedding2048: 0.7012
