In [1]:
import boto3
import json
import numpy as np
import io
import IPython.display
import scipy.io.wavfile as sciwav
from collections import Counter
from s3fs.core import S3FileSystem

from sklearn.cluster import KMeans

In [3]:
bucket = 'music-ml-gigioli'

In [16]:
s3 = S3FileSystem()

embeddings, fnames = [], []
for f in s3.walk('s3://music-ml-gigioli/data/nsynth/nsynth-train/embeddings/'):
    if 'acoustic' in f:
        fnames.append(f)
        embeddings.append(np.load(s3.open(f)))

In [17]:
len(embeddings)

3002

In [18]:
embeddings[0].shape

(125, 16)

In [19]:
time_features = []
mfcc_features = []
concat_features = []
for embd in embeddings:
    avg_time_features = np.mean(embd, axis=0)
    avg_mfcc_features = np.mean(embd, axis=1)
    
    time_features.append(avg_time_features)
    mfcc_features.append(avg_mfcc_features)
    concat_features.append(np.concatenate([avg_time_features, avg_mfcc_features]))

In [20]:
embeddings = np.array(embeddings)

In [21]:
vectors = embeddings.reshape(embeddings.shape[0], -1)

In [22]:
kmeans = KMeans(n_clusters=10)

In [23]:
clusters = kmeans.fit_predict(vectors)

In [24]:
results = sorted(list(zip(fnames, clusters)), key=lambda x : x[1])

In [25]:
Counter(list(zip(*results))[1])

Counter({0: 548,
         1: 481,
         2: 85,
         3: 433,
         4: 352,
         5: 204,
         6: 179,
         7: 206,
         8: 309,
         9: 205})

In [26]:
[x for x in results if x[1] == 2][:5]

[('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_005-068-025_embeddings.npy',
  2),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_005-071-025_embeddings.npy',
  2),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_005-081-100_embeddings.npy',
  2),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_005-086-025_embeddings.npy',
  2),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_031-086-050_embeddings.npy',
  2)]

In [37]:
[x for x in results if x[1] == 0][:10]

[('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/bass_acoustic_000-060-127_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-031-025_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-034-050_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-037-025_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-038-075_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-042-100_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-043-025_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-066-025_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/embeddings/brass_acoustic_002-078-127_embeddings.npy',
  0),
 ('music-ml-gigioli/data/nsynth/nsynth-train/em

In [35]:
obj = boto3.resource('s3').Object(bucket, 'data/nsynth/nsynth-train/audio/brass_acoustic_005-086-025.wav')
sample_rate, X = sciwav.read(io.BytesIO(obj.get()['Body'].read()))
X = X.astype(np.float32)
IPython.display.Audio(X, rate=16000)

In [39]:
obj = boto3.resource('s3').Object(bucket, 'data/nsynth/nsynth-train/audio/brass_acoustic_002-078-127.wav')
sample_rate, X = sciwav.read(io.BytesIO(obj.get()['Body'].read()))
X = X.astype(np.float32)
IPython.display.Audio(X, rate=16000)

In [70]:
[x for x in results if x[1] == 5]

[('data/nsynth/nsynth-train/embeddings/bass_electronic_000-022-100_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_electronic_000-023-025_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_electronic_007-022-050_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_electronic_022-034-075_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_004-076-050_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_005-026-025_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_005-039-127_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_017-070-127_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_017-087-127_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_018-046-075_embeddings.npy',
  5),
 ('data/nsynth/nsynth-train/embeddings/bass_synthetic_018-098-100_embeddings.npy',
  5),
 ('data/nsynth/ns