In [3]:
import numpy as np
import scipy.io as sio
import os
import numpy as np

In [None]:
song_cens_data = {}

for root, dirs, files in os.walk('CENS'):

    for file in files:
        if not file.endswith('.mat'):
            continue
        else:
            file_path = os.path.join(root, file)
            mat = sio.loadmat(file_path)
            xcens = mat['XCENS'].item()
            song_cens_data[file_path[file_path.rfind("/") + 1: file_path.find("_")]] = xcens

In [None]:
song_mfcc_data = {}

for root, dirs, files in os.walk('MFCCs'):

    for file in files:
        if not file.endswith('.mat'):
            continue
        else:
            file_path = os.path.join(root, file)
            mat = sio.loadmat(file_path)
            xmfcc = mat['XMFCC'].item()
            song_mfcc_data[file_path[file_path.rfind("/") + 1: file_path.find("_")]] = xmfcc

In [None]:
song_hpcp_data = {}

for root, dirs, files in os.walk('HPCPs'):

    for file in files:
        if not file.endswith('.mat'):
            continue
        else:
            file_path = os.path.join(root, file)
            mat = sio.loadmat(file_path)
            xhpcp = mat['XHPCP'].item()
            song_hpcp_data[file_path[file_path.rfind("/") + 1: file_path.find("_")]] = xhpcp

In [None]:
def aggregate_feature(feature, time_steps):
    aggregated = []

    for step in time_steps:
        start = step["frame_start"]
        end   = step["frame_end"]

        if end > start:
            agg = feature[start:end].mean(axis=0)
        else:
            agg = feature[start]

        aggregated.append(agg)

    return np.vstack(aggregated)

In [None]:
song_time_step_data = {}

for root, dirs, files in os.walk('Beats'):

    for file in files:
        if not file.endswith('.mat'):
            continue
        else:
            file_path = os.path.join(root, file)
            mat = sio.loadmat(file_path)
            saved_name = file_path[file_path.rfind("/") + 1: file_path.find("_")]

            fs = mat['Fs'].item()
            hop_size = mat['hopSize'].item()
            beats0 = mat['beats0'].squeeze()
            beat_times_in_sec = beats0 * hop_size / fs
            beat_durations = np.diff(beat_times_in_sec)

            tempo_inst = 60.0 / beat_durations
            tempo_features = np.column_stack([
                    tempo_inst,
                    np.log(tempo_inst),
                    np.diff(np.concatenate([[tempo_inst[0]], tempo_inst]))
                ])

            time_steps = []
            for t in range(len(beats0) - 1):
                step = {
                    "t": t,
                    "frame_start": int(beats0[t]),
                    "frame_end": int(beats0[t + 1]),
                    "time_start_in_sec": beat_times_in_sec[t],
                    "time_end_in_sec": beat_times_in_sec[t + 1],
                    "duration_in_sec": beat_durations[t]
                }

                time_steps.append(step)

            mfcc_beat = aggregate_feature(song_mfcc_data[saved_name], time_steps)
            hpcp_beat = aggregate_feature(song_hpcp_data[saved_name], time_steps)
            cens_beat = aggregate_feature(song_cens_data[saved_name], time_steps)

            X = np.hstack([
                mfcc_beat,
                hpcp_beat,
                cens_beat,
                tempo_features
            ])
            song_time_step_data[saved_name] = X