In [4]:
import numpy as np
from scipy.spatial import distance_matrix

## GTZAN

In [5]:
train_embeddings = np.load("gtzan/gtzan_train_embeddings.npy")
train_labels = np.load("gtzan/gtzan_train_labels.npy")

unique_labels, count_labels = np.unique(train_labels, return_counts=True)
print("Train label count:")
centers = []
labels = []
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
    centers.append(np.mean(train_embeddings[train_labels == label], axis=0))
centers = np.array(centers)

Train label count:
    - 0: 46
    - 1: 48
    - 2: 45
    - 3: 42
    - 4: 47
    - 5: 43
    - 6: 44
    - 7: 41
    - 8: 43
    - 9: 44


In [7]:
val_embeddings = np.load("gtzan/gtzan_val_embeddings.npy")
val_labels = np.load("gtzan/gtzan_val_labels.npy")
unique_labels, count_labels = np.unique(val_labels, return_counts=True)
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
val_similarities = -distance_matrix(val_embeddings, centers)
val_preds = np.argmax(val_similarities, axis=1)
val_labels_idx = np.unique(val_labels, return_inverse=True)[1]
np.mean(val_preds == val_labels_idx)

    - 0: 23
    - 1: 20
    - 2: 23
    - 3: 22
    - 4: 18
    - 5: 17
    - 6: 20
    - 7: 13
    - 8: 17
    - 9: 24


0.5939086294416244

In [8]:
test_embeddings = np.load("gtzan/gtzan_test_embeddings.npy")
test_labels = np.load("gtzan/gtzan_test_labels.npy")
unique_labels, count_labels = np.unique(test_labels, return_counts=True)
print("Test label count:")
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
test_similarities = -distance_matrix(test_embeddings, centers)
test_preds = np.argmax(test_similarities, axis=1)
test_labels_idx = np.unique(test_labels, return_inverse=True)[1]
np.mean(test_preds == test_labels_idx)

Test label count:
    - 0: 31
    - 1: 31
    - 2: 30
    - 3: 29
    - 4: 27
    - 5: 27
    - 6: 27
    - 7: 30
    - 8: 26
    - 9: 32


0.5896551724137931

## VocalSet - Singer

In [9]:
train_embeddings = np.load("vocalset-singer/vocalset-singer_train_embeddings.npy")
train_labels = np.load("vocalset-singer/vocalset-singer_train_labels.npy")

unique_labels, count_labels = np.unique(train_labels, return_counts=True)
print("Train label count:")
centers = []
labels = []
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
    centers.append(np.mean(train_embeddings[train_labels == label], axis=0))
centers = np.array(centers)

Train label count:
    - 0: 87
    - 1: 91
    - 2: 86
    - 3: 87
    - 4: 88
    - 5: 75
    - 6: 86
    - 7: 87
    - 8: 87
    - 9: 90
    - 10: 87
    - 11: 85
    - 12: 88
    - 13: 87
    - 14: 87
    - 15: 87
    - 16: 85
    - 17: 87
    - 18: 87
    - 19: 87


In [10]:
val_embeddings = np.load("vocalset-singer/vocalset-singer_val_embeddings.npy")
val_labels = np.load("vocalset-singer/vocalset-singer_val_labels.npy")
unique_labels, count_labels = np.unique(val_labels, return_counts=True)
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
val_similarities = -distance_matrix(val_embeddings, centers)
val_preds = np.argmax(val_similarities, axis=1)
val_labels_idx = np.unique(val_labels, return_inverse=True)[1]
np.mean(val_preds == val_labels_idx)

    - 0: 59
    - 1: 60
    - 2: 57
    - 3: 58
    - 4: 58
    - 5: 50
    - 6: 57
    - 7: 59
    - 8: 59
    - 9: 60
    - 10: 59
    - 11: 57
    - 12: 58
    - 13: 59
    - 14: 59
    - 15: 59
    - 16: 57
    - 17: 59
    - 18: 59
    - 19: 59


0.49827882960413084

In [11]:
test_embeddings = np.load("vocalset-singer/vocalset-singer_test_embeddings.npy")
test_labels = np.load("vocalset-singer/vocalset-singer_test_labels.npy")
unique_labels, count_labels = np.unique(test_labels, return_counts=True)
print("Test label count:")
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
test_similarities = -distance_matrix(test_embeddings, centers)
test_preds = np.argmax(test_similarities, axis=1)
test_labels_idx = np.unique(test_labels, return_inverse=True)[1]
np.mean(test_preds == test_labels_idx)

Test label count:
    - 0: 36
    - 1: 38
    - 2: 36
    - 3: 36
    - 4: 37
    - 5: 31
    - 6: 36
    - 7: 36
    - 8: 36
    - 9: 37
    - 10: 36
    - 11: 36
    - 12: 37
    - 13: 36
    - 14: 36
    - 15: 36
    - 16: 36
    - 17: 36
    - 18: 36
    - 19: 36


0.525

## VocalSet - Tech

In [12]:
train_embeddings = np.load("vocalset-tech/vocalset-tech_train_embeddings.npy")
train_labels = np.load("vocalset-tech/vocalset-tech_train_labels.npy")

unique_labels, count_labels = np.unique(train_labels, return_counts=True)
print("Train label count:")
centers = []
labels = []
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
    centers.append(np.mean(train_embeddings[train_labels == label], axis=0))
centers = np.array(centers)

Train label count:
    - 0: 147
    - 1: 205
    - 2: 118
    - 3: 114
    - 4: 114
    - 5: 11
    - 6: 57
    - 7: 57
    - 8: 57
    - 9: 111


In [13]:
val_embeddings = np.load("vocalset-tech/vocalset-tech_val_embeddings.npy")
val_labels = np.load("vocalset-tech/vocalset-tech_val_labels.npy")
unique_labels, count_labels = np.unique(val_labels, return_counts=True)
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
val_similarities = -distance_matrix(val_embeddings, centers)
val_preds = np.argmax(val_similarities, axis=1)
val_labels_idx = np.unique(val_labels, return_inverse=True)[1]
np.mean(val_preds == val_labels_idx)

    - 0: 21
    - 1: 29
    - 2: 17
    - 3: 16
    - 4: 16
    - 5: 2
    - 6: 8
    - 7: 8
    - 8: 8
    - 9: 16


0.6382978723404256

In [14]:
test_embeddings = np.load("vocalset-tech/vocalset-tech_test_embeddings.npy")
test_labels = np.load("vocalset-tech/vocalset-tech_test_labels.npy")
unique_labels, count_labels = np.unique(test_labels, return_counts=True)
print("Test label count:")
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
test_similarities = -distance_matrix(test_embeddings, centers)
test_preds = np.argmax(test_similarities, axis=1)
test_labels_idx = np.unique(test_labels, return_inverse=True)[1]
np.mean(test_preds == test_labels_idx)

Test label count:
    - 0: 61
    - 1: 91
    - 2: 50
    - 3: 50
    - 4: 52
    - 5: 5
    - 6: 25
    - 7: 25
    - 8: 25
    - 9: 51


0.6252873563218391

## NSynth

In [15]:
train_embeddings = np.load("nsynth/nsynth_train_embeddings.npy")
train_labels = np.load("nsynth/nsynth_train_labels.npy")

unique_labels, count_labels = np.unique(train_labels, return_counts=True)
print("Train label count:")
centers = []
labels = []
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
    centers.append(np.mean(train_embeddings[train_labels == label], axis=0))
centers = np.array(centers)

Train label count:
    - 0: 5000
    - 1: 5000
    - 2: 5000
    - 3: 4998
    - 4: 4998
    - 5: 4998
    - 6: 5000
    - 7: 5000
    - 8: 5000
    - 9: 5000
    - 10: 5000


In [16]:
val_embeddings = np.load("nsynth/nsynth_val_embeddings.npy")
val_labels = np.load("nsynth/nsynth_val_labels.npy")
unique_labels, count_labels = np.unique(val_labels, return_counts=True)
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
val_similarities = -distance_matrix(val_embeddings, centers)
val_preds = np.argmax(val_similarities, axis=1)
val_labels_idx = np.unique(val_labels, return_inverse=True)[1]
np.mean(val_preds == val_labels_idx)

    - 0: 2638
    - 1: 886
    - 2: 470
    - 3: 2081
    - 4: 2404
    - 5: 663
    - 6: 1598
    - 7: 720
    - 8: 814
    - 10: 404


0.29563022558763213

In [17]:
test_embeddings = np.load("nsynth/nsynth_test_embeddings.npy")
test_labels = np.load("nsynth/nsynth_test_labels.npy")
unique_labels, count_labels = np.unique(test_labels, return_counts=True)
print("Test label count:")
for label, count in zip(unique_labels, count_labels, strict=False):
    print(f"    - {label}: {count}")
test_similarities = -distance_matrix(test_embeddings, centers)
test_preds = np.argmax(test_similarities, axis=1)
test_labels_idx = np.unique(test_labels, return_inverse=True)[1]
np.mean(test_preds == test_labels_idx)

Test label count:
    - 0: 843
    - 1: 269
    - 2: 180
    - 3: 652
    - 4: 766
    - 5: 202
    - 6: 502
    - 7: 235
    - 8: 306
    - 10: 141


0.296630859375