# Baseline with MSD timbre data

We use MSD timbre data as a baseline, agreggating through time and keeping the following statistics (in this order):

* Mean
* Max
* Variance
* L2-norm

Since the MSD timbre data is a set of 12 coefficients per time frame, and we are using statistic, each aggregated song will be represented as a $12 * 4 = 48$ dimensional vector.

In [12]:
import h5py
import os
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm

MSD_DIR = "/mnt/shared/deep_learning/onieto/msd/msd/orig_data/"
SUBSET_DICT = {"test": "../data/items_index_test_model_929-pred_16_multi2deT.tsv",
               "train": "../data/items_index_train_model_929-pred_16_multi2deT.tsv",
               "val": "../data/items_index_val_model_929-pred_16_multi2deT.tsv"}

In [8]:
def path_from_trackid(trackid):
    """
    Returns the typical path, with the letters[2-3-4]
    of the trackid (starting at 0), hence a song with
    trackid: TRABC1839DQL4H... will have path:
    A/B/C/TRABC1839DQL4H....h5
    """
    p = os.path.join(trackid[2], trackid[3])
    p = os.path.join(p, trackid[4])
    p = os.path.join(p, trackid + '.h5')
    return p

def aggregate_track(trackid):
    f = h5py.File(os.path.join(MSD_DIR, path_from_trackid(trackid)), "r")
    timbre = f["analysis"]["segments_timbre"]
    
    # Aggregate Mean, Max, Variance, and L2-Norm (in this order)
    agg = np.mean(timbre, axis=0)
    agg = np.concatenate((agg, np.max(timbre, axis=0)))
    agg = np.concatenate((agg, np.var(timbre, axis=0)))
    agg = np.concatenate((agg, np.linalg.norm(timbre, axis=0, ord=2)))
    
    return agg

In [9]:
# Main loop
missing_tracks = []
for key in SUBSET_DICT.keys():
    print("Computing {}...", key)
    df = pd.read_csv(SUBSET_DICT[key], sep="\t", header=None)
    aggs = []
    for i, row in tqdm(df.iterrows(), total=len(df)):
        try:
            aggs.append(aggregate_track(row[0]))
        except OSError:
            missing_tracks.append(row[0])
            aggs.append(np.zeros(48))
    np.save("msd_agg_{}.npy".format(key), np.asarray(aggs))

  0%|          | 9/15863 [00:00<03:09, 83.55it/s]

Computing {}... test


100%|██████████| 15863/15863 [15:06<00:00, 17.50it/s] 


Computing {}... train


100%|██████████| 115839/115839 [1:56:20<00:00, 16.59it/s]  


Computing {}... val


100%|██████████| 15657/15657 [15:41<00:00, 16.64it/s]


In [55]:
df = pd.read_csv(SUBSET_DICT["test"], sep="\t", header=None)
df.head()

Unnamed: 0,0
0,TRBGAEE128F145E8B3
1,TRZVVWH128F9316856
2,TRWZDKB128F149A30F
3,TRGJUTR128F14872C4
4,TRGGMIX128F4263396


In [17]:
with open("msd_agg_missing_trackids.txt", "w") as f:
    f.write('\n'.join(missing_tracks))