In [2]:
import os
import re
from collections import defaultdict
import tqdm

import librosa

In [10]:
DATA_PATH = 'data/SMD_Western-Music/mp3/'
FEATURES_DIR = './features/SMD_Western-Music'
os.makedirs(FEATURES_DIR, exist_ok=True)
SAMPLING_RATE = 10
SAMPLING_RATE = 16000

In [5]:
def load_SMD_Western_Music_data(data_path, sr) :
    files = os.listdir(data_path)
    pat = re.compile(r'([^_]*_[^_]*)_.*')
    
    mid_sr = 100
    group_composes = defaultdict(lambda: list())
    for file_name in tqdm.tqdm(files):
        path = os.path.join(data_path, file_name)
        if sr < mid_sr:
            y, _ = librosa.load(path, sr=mid_sr)
            y = librosa.resample(y, mid_sr, sr)
        else:
            y, _ = librosa.load(path, sr=sr)
        group_composes[pat.search(file_name).group(1)].append((file_name, y))
    return group_composes

In [6]:
group_composes = load_SMD_Western_Music_data(DATA_PATH, SAMPLING_RATE)

100%|██████████| 42/42 [10:22<00:00, 14.83s/it]


In [7]:
for key, val in group_composes.items():
    print(key, len(val))

Brahms_Op108-03 2
Ravel_PianoTrio-01 3
Debussy_L140-02 4
Chopin_Op060 2
Ravel_PianoTrio-02 2
Debussy_L140-03 4
Debussy_L140-01 4
Mozart_KV448-01 2
Brahms_Op034b-03 2
Martin_Trio-02 2
Brahms_Op108-02 3
Ravel_PianoTrio-04 2
Ravel_PianoTrio-03 2
Rachmaninoff_Op005-04 2
Rachmaninoff_Op005-01 2
Brahms_Op108-04 2
Brahms_Op108-01 2


In [8]:
shift = 100 # ms
window  = 200 # ms

hop_length = int(SAMPLING_RATE * shift / 1000.0)
n_fft = int(SAMPLING_RATE * window / 1000.0)

In [12]:
import numpy
numpy.set_printoptions(threshold=numpy.nan, linewidth=1000)

for name, compositions in tqdm.tqdm(group_composes.items()):
    path = os.path.join(FEATURES_DIR, '{}.txt'.format(name))
    with open(path, 'w') as f:
        for rel_name, y in compositions:
            features = librosa.feature.chroma_stft(y, sr=SAMPLING_RATE, n_fft=n_fft, hop_length=hop_length)
            print(features.T, file=f)

100%|██████████| 17/17 [00:44<00:00,  2.63s/it]
