https://www.frontiersin.org/journals/genetics/articles/10.3389/fgene.2022.954024/full
^^ the ones closest to the centroid best represent the group

**group embeddings by trend and get the nearest to centroid of each to represent the trend**

In [10]:
import os
import numpy as np
from pathlib import Path
import numpy as np

def cosine_similarity_vec(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def load_embeddings_for_videos(video_names, modalities, emb_folder="similarity pipeline/embeddings_out"):
 
    folder_map = {
        "audio": "audio2048",
        "visual": "video2048", 
        "text": "text768"
    }

    video_embs = {vid:{} for vid in video_names}
    
    for vid in video_names:
        for mod in modalities:
            dim = "768" if mod == "text" else "2048"
            folder = os.path.join(emb_folder, folder_map[mod])
            fname = f"{vid}_emb-{mod}{dim}.npy"
            path = os.path.join(folder, fname)
            
            if os.path.exists(path):
                video_embs[vid][mod] = np.load(path)
            else:
                print(f"⚠️ Missing {mod} embedding for {vid}")
    
    return video_embs

def compute_weighted_scores(video_embs, weights):
    scores = {}
    videos = list(video_embs.keys())
    
    for candidate in videos:
        total_score = 0.0
        count = 0
        for other in videos:
            if candidate == other:
                continue
            sim_sum = 0.0
            total_weight = 0.0
            for mod, w in weights.items():
                if mod in video_embs[candidate] and mod in video_embs[other]:
                    sim = cosine_similarity_vec(video_embs[candidate][mod], video_embs[other][mod])
                    sim_sum += sim * w
                    total_weight += w
            if total_weight > 0:
                total_score += sim_sum
                count += 1
        scores[candidate] = total_score / max(1, count)
    
    return scores

def pick_representative(video_names, video_embs, weights):
    scores = compute_weighted_scores(video_embs, weights)
    best_vid = max(scores.items(), key=lambda x: x[1])[0]
    return best_vid, scores

# --------------------------
# USER INPUT
# --------------------------
video_names = [
    "trend10vid1",
    "trend10vid2",
    "trend10vid3",
    "trend10vid4",
    "trend10vid5",
    "trend10vid6",
    "trend10vid7",
    "trend10vid8",
    "trend10vid9",
    "trend10vid10"
]

modalities = ["audio", "visual", "text"]  # visual instead of video
weights = {"audio": 1/3, "visual": 1/3, "text": 1/3}

# --------------------------
# LOAD EMBEDDINGS & PICK REPRESENTATIVE
# --------------------------
video_embs = load_embeddings_for_videos(video_names, modalities)
rep_video, scores = pick_representative(video_names, video_embs, weights)

print(f"Representative video: {rep_video}\n")
print("Scores:")
for vid, score in scores.items():
    print(f"{vid}: {score:.4f}")

Representative video: trend10vid5

Scores:
trend10vid1: 0.5569
trend10vid2: 0.6117
trend10vid3: 0.6060
trend10vid4: 0.6651
trend10vid5: 0.6771
trend10vid6: 0.6654
trend10vid7: 0.6292
trend10vid8: 0.6642
trend10vid9: 0.6351
trend10vid10: 0.6004
