In [None]:
import json
import pickle
import librosa

import numpy as np
import matplotlib.pyplot as plt

from time import time
from glob import glob

import librosa.display
plt.style.use('seaborn')

### Load Data and Preprocessing

In [None]:
folder_path = '../../data/looperman/'
data_list = glob(folder_path + '*')

In [None]:
with open('./data/meta_info.json', 'r') as f:
    meta_info = json.load(f)

In [None]:
temp_full_cov = []
temp_half_cov = []

start_time = time()
for data in data_list:
    # load data
    wav, sr = librosa.load(data, sr=8000)
    doc_idx = data.split('/')[-1][:-4]
    
    # meta-info
    bpm = meta_info[doc_idx]['bpm']
    sec_wav = meta_info[doc_idx]['sec_wav']
    
    sec_per_beat = 60 / bpm
    sec_per_bar = sec_per_beat * 4
    
    # sanity check
    assert sec_wav == sec_per_bar * 8

    len_bar = int(sec_wav / sec_per_bar)
    len_beat = int(sec_wav / sec_per_beat)
    len_per_beat = int(wav.shape[0] / len_beat)
    len_per_bar = len_per_beat * 4
    
    # get feautures (chromagram)
    mel_wav = np.zeros((128, len_bar))
    # chroma_wav = np.zeros((12, len_bar))
    for i in range(len_bar):
        start_idx = i * len_per_bar
        end_idx = (i + 1) * len_per_bar
        window_wav = wav[start_idx:end_idx]

        M = librosa.feature.melspectrogram(window_wav, n_fft=len_per_bar, hop_length=len_per_bar+1)
        # C = librosa.feature.chroma_stft(window_wav, n_fft=len_per_bar, hop_length=len_per_bar+1)
        
        mel_wav[:, i] = M.squeeze()
        # chroma_wav[:, i] = C.squeeze()
        
    mel_cov = np.corrcoef(mel_wav.T)
    temp_full_cov.append(mel_cov)
    
    mel_cov = mel_cov[np.triu_indices(len_bar, k=1)]
    temp_half_cov.append(mel_cov)
    
    print('I am on %d (%0.3f sec)' % (int(doc_idx), time()-start_time))
    start_time = time()

wav_full_cov = np.stack(temp_full_cov)
wav_half_cov = np.stack(temp_half_cov)

print('\nwav_full_cov shape :', wav_full_cov.shape)
print('wav_half_cov shape :', wav_half_cov.shape)

In [None]:
# plot samples
cmap = 'viridis'

# plot parameters
CHAR_FONT_SIZE = 15
NUM_FONT_SIZE = 12
WIDTH = HEIGHT = 7

# plotting
plt.figure(figsize=(WIDTH, HEIGHT))
plt.imshow(np.std(wav_full_cov, axis=0), cmap=cmap)
plt.xticks(fontsize=NUM_FONT_SIZE)
plt.yticks(fontsize=NUM_FONT_SIZE)
plt.colorbar(fraction=0.046, pad=0.04)
plt.grid(None)
plt.savefig('./images/std_cov.png', dpi=1000, bbox_inches='tight', pad_inches=0)
plt.show()

In [None]:
# save pickle
save_path = './data/wav_cov.pkl'
with open(save_path, 'wb') as f:
    print('file saved!')
    pickle.dump(wav_half_cov, f, protocol=pickle.HIGHEST_PROTOCOL)