In [1]:
import sys
import logging
import os
import json
import random
from tqdm import tqdm
import warnings

import numpy as np

import librosa
from librosa.feature import mfcc
import scdata

import torch

sys.path.append('..')
logging.getLogger().setLevel(logging.INFO)
warnings.filterwarnings('ignore')

from aural_travels.data import soundcloud

# Adjust this to your local copy of the SoundCloud dataset.
# Should contain the `scdata.json` file and the `audio` directory.
DATA_DIR = '/home/leo/src/scdata'
AUDIO_DIR = os.path.join(DATA_DIR, 'audio')

torch.manual_seed(42)
random.seed(42)

In [6]:
dataset = soundcloud.CoverGenerationDataset(DATA_DIR, 'training', normalize_mfcc=False)
print(dataset.num_samples())

mfccs = np.zeros((len(dataset), dataset.num_features()))
mfccs = []

samples = random.sample(list(range(len(dataset))), k=1000)

for idx in tqdm(samples):
    mfcc = dataset[idx][0]
    mfccs.append(mfcc)

mfccs = torch.cat(mfccs)
print(mfccs.size())
    
print(f'MFCC_MEAN={torch.mean(mfccs, dim=0)}')
print(f'MFCC_STD={torch.std(mfccs, dim=0)}')

  0%|          | 0/1000 [00:00<?, ?it/s]

42


100%|██████████| 1000/1000 [16:57<00:00,  1.02s/it]

torch.Size([42000, 20])
MFCC_MEAN=tensor([-129.6565,   93.0169,    6.5002,   22.2244,    4.3236,   10.2308,
          -0.5253,    6.3963,   -2.1541,    4.7143,   -2.3346,    3.7656,
          -2.2059,    2.7291,   -2.3556,    2.6430,   -2.8859,    2.2499,
          -2.0655,    2.2221])
MFCC_STD=tensor([147.8568,  55.7701,  34.7229,  22.2324,  17.8979,  16.1369,  14.1328,
         13.1724,  12.0060,  11.6405,  10.6034,  10.4883,   9.8023,   9.4330,
          9.2720,   9.0574,   8.7890,   8.8789,   8.7943,   8.8655])





In [None]:
dataset[6]

In [None]:
mfcc(np.array([0.0]*1024), n_fft=1024, center=False, hop_length=1024)

In [None]:
with open(os.path.join(DATA_DIR, 'scdata.json')) as f:
    tracks = list(json.load(f).values())

In [None]:
track_idx = 1
print(tracks[track_idx]['genre'])
print(tracks[track_idx]['duration'])

soundcloud.load_image(DATA_DIR, tracks[track_idx]['id'])

In [None]:
x = librosa.load(scdata.get_audio_path(AUDIO_DIR, tracks[track_idx]['id']))

In [None]:
x[0].shape

In [None]:
x[1]

In [None]:
x[0].shape[0] / x[1]

In [None]:
m = mfcc(x[0], sr=x[1])

In [None]:
m.shape

In [None]:
np.mean(m, axis=1)

In [None]:
np.std(m, axis=1)