In [1]:
import os
import pandas as pd
from torch.utils import data
import random
import numpy as np
from torch.utils.data import  DataLoader
from tqdm import tqdm
import librosa
from collections import defaultdict

In [2]:
SAMPLE_RATE = 22050

In [3]:
class DataSetAudio(data.Dataset):
    def __init__(self, dset_path, max_lengh=10000, seq_lengh=20, is_train=True):
        super(DataSetAudio).__init__()
        self.dset_path, self.labels = self.extract(dset_path)
        self.max_lengh = max_lengh
        self.seq_lengh = seq_lengh
        self.is_train = is_train
        #length of sequence
        self.steps = int(SAMPLE_RATE*seq_lengh)
    
    def __len__(self):
        return self.max_lengh

    def __iter__(self):
        # Same validation
        if not self.is_train:
            random.seed(72)
        return self
    
    def __getitem__(self, idx):
        # % by len(s.dset) because it gives bug if not
        audio_path = self.dset_path[idx % len(self.dset_path)]
        label = audio_path.split('/')[2]
        signal, sr = librosa.load(audio_path, sr = SAMPLE_RATE)
        #choosing random part of the songs
        gen = random.randrange(0,len(signal) - self.steps) 
        mfcc = librosa.feature.mfcc(y = signal[gen: gen + self.steps],
                                                    sr = sr,
                                                    n_fft = 2048,
                                                    n_mfcc = 13,
                                                    hop_length = 512)
        return mfcc.T, self.labels[label]

    def extract(self,dir):
        file_list = []
        labels = defaultdict()
        for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dir)):
                if dirpath is not dir:
                        labels[dirpath.split('/')[-1]] = i-1
                        for file in filenames:
                                file_list.append(os.path.join(dirpath, file))
        return file_list, labels

In [4]:
data_path = 'Data/genres_original'
dataset = DataSetAudio(dset_path=data_path, seq_lengh=20)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [7]:
def gen_training_samples(dloader):
    x_train = []
    y_train = []
    for batch in tqdm((dloader)):
        inputs, labels = batch
        x_train.append(inputs)
        y_train.append(labels)

    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    return x_train, y_train

In [6]:
X_train, Y_train = gen_training_samples(dataloader)

100%|██████████| 313/313 [05:20<00:00,  1.02s/it]


In [7]:
dataset.labels

defaultdict(None,
            {'hiphop': 0,
             'classical': 1,
             'blues': 2,
             'metal': 3,
             'jazz': 4,
             'country': 5,
             'pop': 6,
             'rock': 7,
             'disco': 8,
             'reggae': 9})

In [8]:
'''
Validate equal parts
'''
from collections import Counter

Counter(Y_train)

Counter({8: 1000,
         9: 1000,
         1: 1000,
         3: 1000,
         2: 1000,
         4: 990,
         0: 1010,
         5: 1000,
         7: 1000,
         6: 1000})

In [9]:
np.save('X_train.npy',X_train)
np.save('Y_train.npy',Y_train)

In [10]:
dsetest = DataSetAudio(dset_path=data_path,max_lengh=2000, is_train=False)
dloadertest = DataLoader(dsetest, batch_size=32, shuffle=True)
X_test, Y_test = gen_training_samples(dloadertest)

100%|██████████| 63/63 [01:02<00:00,  1.01it/s]


In [11]:
Counter(Y_test)


Counter({0: 202,
         4: 198,
         3: 200,
         6: 200,
         2: 200,
         9: 200,
         7: 200,
         8: 200,
         5: 200,
         1: 200})

In [12]:
np.save('X_test.npy',X_test)
np.save('Y_test.npy',Y_test)

In [4]:
data_path = 'Data/fma_data'
dataset2 = DataSetAudio(dset_path=data_path, max_lengh= 1000)
dtest = DataLoader(dataset2, batch_size=32, shuffle=True)

In [5]:
dataset2.labels

defaultdict(None,
            {'Disco': 0,
             'Metal': 1,
             'Country': 2,
             'Classical': 3,
             'Reggae-Dub': 4,
             'Blues': 5,
             'Pop': 6,
             'Hip-Hop': 7,
             'Rock': 8,
             'Jazz': 9})

In [8]:
X_val, Y_val = gen_training_samples(dtest)

 38%|███▊      | 12/32 [00:45<01:14,  3.75s/it][src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!
100%|██████████| 32/32 [01:57<00:00,  3.66s/it]


In [10]:
Counter(Y_val)

Counter({0: 100,
         4: 100,
         5: 100,
         6: 100,
         7: 100,
         2: 100,
         8: 100,
         3: 100,
         1: 100,
         9: 100})

Relabel it to be the same. 

In [11]:
#to make label in both dataset unites
Y = []
for i in Y_val:
    if i == 0:
        Y.append(8)
    elif i == 1:
        Y.append(3)
    elif i == 2:
        Y.append(5)    
    elif i == 3:
        Y.append(1)
    elif i == 4:
        Y.append(9)
    elif i == 5:
        Y.append(2)
    elif i == 6:
        Y.append(6)
    elif i == 7:
        Y.append(0)
    elif i == 8:
        Y.append(7)
    elif i == 9:
        Y.append(4)
Y_val_ = np.array(Y)

In [12]:
Counter(Y_val_)

Counter({8: 100,
         9: 100,
         2: 100,
         6: 100,
         0: 100,
         5: 100,
         7: 100,
         1: 100,
         3: 100,
         4: 100})

In [None]:
np.save('X_val.npy',X_val)
np.save('Y_val.npy',Y_val_)