# Baseline with MSD timbre data

We use MSD timbre data as a baseline, agreggating through time and keeping the following statistics (in this order):

* Mean
* Max
* Variance
* L2-norm

Since the MSD timbre data is a set of 12 coefficients per time frame, and we are using statistic, each aggregated song will be represented as a $12 * 4 = 48$ dimensional vector.

In [45]:
import h5py
import os
import numpy as np
import pandas as pd

MSD_DIR = "/mnt/shared/deep_learning/onieto/msd/msd/orig_data/"
SUBSET_DICT = {"test": "../data/items_index_test_multi2deA2.tsv",
               "train": "../data/items_index_train_multi2deA2.tsv",
               "val": "../data/items_index_val_multi2deA2.tsv"}

In [44]:
def path_from_trackid(trackid):
    """
    Returns the typical path, with the letters[2-3-4]
    of the trackid (starting at 0), hence a song with
    trackid: TRABC1839DQL4H... will have path:
    A/B/C/TRABC1839DQL4H....h5
    """
    p = os.path.join(trackid[2], trackid[3])
    p = os.path.join(p, trackid[4])
    p = os.path.join(p, trackid + '.h5')
    return p

def aggregate_track(track_uid):
    f = h5py.File(os.path.join(MSD_DIR, path_from_trackid(track_uid)), "r")
    timbre = f["analysis"]["segments_timbre"]
    
    # Aggregate Mean, Max, Variance, and L2-Norm (in this order)
    agg = np.mean(timbre, axis=0)
    agg = np.concatenate((agg, np.max(timbre, axis=0)))
    agg = np.concatenate((agg, np.var(timbre, axis=0)))
    agg = np.concatenate((agg, np.linalg.norm(timbre, axis=0, ord=2)))
    
    return agg

In [None]:
# Main loop
for key in SUBSET_DICT:
    df = pd.read_csv(SUBSET_DICT[key], sep="\t", header=None)
    aggs = []
    for i, row in df.iterrows():
        aggs.append(aggregate_track(row[0]))
    np.save("msd_agg_{}.npy".format(key), np.asarray(aggs))