In [None]:
from anime_speaker_embedding.model import AnimeSpeakerEmbedding
import torch


device = "cuda" if torch.cuda.is_available() else "cpu"
model = AnimeSpeakerEmbedding(device=device)

In [None]:
from pathlib import Path

audio_root = Path("path/to/your/audio/directory")

audio_files = list(audio_root.rglob("*.ogg"))
len(audio_files)

In [None]:
def get_emb(file_path: Path):
    return model.get_embedding(file_path)

In [None]:
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor

with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(tqdm(executor.map(get_emb, audio_files), total=len(audio_files)))

In [None]:
import numpy as np

embeddings = np.array(results)  # (N, 192)
embeddings.shape

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings)

In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
import hashlib


def get_speaker_name(audio_path: Path) -> str:
    return audio_path.parent.name


def get_speaker_hash(audio_path: Path) -> str:
    return hashlib.md5(get_speaker_name(audio_path).encode()).hexdigest()[:8]


labels = [get_speaker_hash(p) for p in audio_files]
unique_labels = sorted(set(labels))

cmap = plt.colormaps["tab20"]
colors = [cmap(i % cmap.N) for i in range(len(unique_labels))]

plt.figure(figsize=(10, 8))

for idx, lbl in enumerate(unique_labels):
    idxs = [i for i, l in enumerate(labels) if l == lbl]
    plt.scatter(
        embeddings_2d[idxs, 0],
        embeddings_2d[idxs, 1],
        color=colors[idx],
        label=lbl,
        alpha=0.5,
    )

plt.title("t-SNE of Audio Embeddings by Speaker")
plt.xlabel("t-SNE Dim 1")
plt.ylabel("t-SNE Dim 2")

plt.legend(title="Speaker")
plt.tight_layout()
plt.show()