In [1]:
import librosa
from MusicVectorizer import MusicVectorizer
import numpy as np
import glob
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
classical_paths = glob.glob("data/classical/*.mp3")
classical_songs = [librosa.load(song_path, sr=16000)[0] for song_path in classical_paths]

hiphop_paths = glob.glob("data/90s-rap/*.mp3")
hiphop_songs = [librosa.load(song_path, sr=16000)[0] for song_path in hiphop_paths]

In [3]:
mv = MusicVectorizer()

In [5]:
classical_trajs = list()

for song in tqdm(classical_songs):
    classical_trajs.append(mv.trajectorize_song(song, 16000))

100%|██████████| 22/22 [05:49<00:00, 15.89s/it]


In [10]:
hiphop_trajs = list()

for song in tqdm(hiphop_songs):
    hiphop_trajs.append(mv.trajectorize_song(song, 16000))

100%|██████████| 22/22 [05:25<00:00, 14.79s/it]


In [78]:
from sklearn.cluster import KMeans
from statistics import mode

# get endpoints
classical_endpoints = np.array([np.sum(traj, axis=0) for traj in classical_trajs])  # collapses time axis
hiphop_endpoints = np.array([np.sum(traj, axis=0) for traj in hiphop_trajs])  # n_data x n_layers x n_features

# try for each possible layer

n_train = 18
for layer in range(13):
    # get data
    train_data = np.concatenate((classical_endpoints[:n_train, layer, :], hiphop_endpoints[:n_train, layer, :]))
    test_data = np.concatenate((classical_endpoints[n_train:, layer, :], hiphop_endpoints[n_train:, layer, :]))
    
    # print()
    # print(endpoints.shape)
    # make/fit model
    model = KMeans(n_clusters=2, random_state=2, max_iter=1000, n_init='auto')
    model.fit(train_data)

    # test model
    # calculate train and test accuracies
    train_outputs = model.predict(train_data)
    train_classical = train_outputs[:n_train]
    train_hiphop = train_outputs[n_train:]
    train_acc = (train_classical.tolist().count(mode(train_classical)) + train_hiphop.tolist().count(mode(train_hiphop))) / (2 * n_train)

    test_outputs = model.predict(test_data)
    test_classical = test_outputs[:len(classical_endpoints) - n_train]
    test_hiphop = test_outputs[len(classical_endpoints) - n_train:]
    test_acc = (test_classical.tolist().count(mode(test_classical)) + test_hiphop.tolist().count(mode(test_hiphop))) / (2 * (len(classical_endpoints) - n_train))
    
    print(test_classical, test_hiphop)
    print(f"Layer {layer}: Train accuracy: {train_acc}, Test accuracy: {test_acc}")
    print()
    # print(f"{outputs[:22-n_train]}, {outputs[22-n_train:]}")
    # print(model.predict(test_data))

[0 1 0 1] [1 1 1 1]
Layer 0: Train accuracy: 0.8055555555555556, Test accuracy: 0.75

[0 1 0 1] [1 1 1 1]
Layer 1: Train accuracy: 0.8333333333333334, Test accuracy: 0.75

[0 1 0 1] [1 1 1 1]
Layer 2: Train accuracy: 0.8055555555555556, Test accuracy: 0.75

[1 1 0 1] [1 1 1 1]
Layer 3: Train accuracy: 0.7777777777777778, Test accuracy: 0.875

[1 1 0 1] [1 1 1 1]
Layer 4: Train accuracy: 0.7777777777777778, Test accuracy: 0.875

[1 1 0 1] [1 1 1 1]
Layer 5: Train accuracy: 0.7777777777777778, Test accuracy: 0.875

[1 1 1 1] [1 1 1 1]
Layer 6: Train accuracy: 0.7777777777777778, Test accuracy: 1.0

[1 1 1 1] [1 1 1 1]
Layer 7: Train accuracy: 0.7777777777777778, Test accuracy: 1.0

[1 1 1 1] [1 1 1 1]
Layer 8: Train accuracy: 0.7777777777777778, Test accuracy: 1.0

[1 1 1 1] [1 1 1 1]
Layer 9: Train accuracy: 0.75, Test accuracy: 1.0

[1 1 1 1] [1 1 1 1]
Layer 10: Train accuracy: 0.75, Test accuracy: 1.0

[1 1 0 1] [1 1 1 1]
Layer 11: Train accuracy: 0.75, Test accuracy: 0.875

[0 0 0 0]

In [71]:
np.sum(classical_trajs[0][:, :, :], axis=0).shape

(13, 768)

In [46]:
classical_endpoints = np.array([np.sum(traj, axis=0) for traj in classical_trajs])
hiphop_endpoints = np.array([np.sum(traj, axis=0) for traj in hiphop_trajs])

In [47]:
classical_endpoints.shape

(22, 13, 768)

In [1]:
fucntions = ["Break", "Bridge", "Chorus", "Coda", "Development", "Exposition", "Fade-out", "Head", "Instrumental", "Interlude", "Intro", "Main_Theme", "No_function", "Outro", "Post-chorus", "Post-verse", "Pre-Chorus", "Pre-Verse", "Recap", "Secondary_Theme", "Solo", "Theme", "Transition", "Variation", "Verse"]

In [3]:
len(fucntions)

25