In [15]:
import os
import pandas as pd
from torch.utils import data
import random
import numpy as np
from torch.utils.data import  DataLoader
from tqdm import tqdm
import librosa
from collections import defaultdict

In [16]:
SAMPLE_RATE = 22050

In [17]:
class DataSetAudio(data.Dataset):
    def __init__(self, dset_path, max_lengh=10000, seq_lengh=15, is_train=True):
        super(DataSetAudio).__init__()
        self.dset_path, self.labels = self.extract(dset_path)
        self.max_lengh = max_lengh
        self.seq_lengh = seq_lengh
        self.is_train = is_train
        self.steps = int(SAMPLE_RATE*30/seq_lengh)
    
    def __len__(self):
        return self.max_lengh

    def __iter__(self):
        # Same validation
        if not self.is_train:
            random.seed(72)
        return self
    
    def __getitem__(self, idx):
        audio_path = self.dset_path[idx%999]
        label = audio_path.split('/')[2]
        signal, sr = librosa.load(audio_path, sr = SAMPLE_RATE)
        #choosing random part of the songs
        gen = random.randrange(50,len(signal) - self.steps)
        mfcc = librosa.feature.mfcc(y = signal[gen: gen + self.steps],
                                                    sr = sr,
                                                    n_fft = 2048,
                                                    n_mfcc = 13,
                                                    hop_length = 512)
        return mfcc.T, self.labels[label]

    def extract(self,dir):
        file_list = []
        labels = defaultdict()
        for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dir)):
                if dirpath is not dir:
                        labels[dirpath.split('/')[-1]] = i-1
                        for file in filenames:
                                file_list.append(os.path.join(dirpath, file))
        #todo add len of dir
        return file_list, labels
                    

In [18]:
data_path = 'Data/genres_original'
dataset = DataSetAudio(dset_path=data_path)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [19]:
def gen_training_samples(dloader):
    x_train = []
    y_train = []
    for batch in tqdm((dloader)):
        inputs, labels = batch
        x_train.append(inputs)
        y_train.append(labels)

    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    return x_train, y_train

In [20]:
X_train, Y_train = gen_training_samples(dataloader)

100%|██████████| 313/313 [02:57<00:00,  1.76it/s]


In [21]:
'''
Validate equal parts
'''
from collections import Counter

Counter(Y_train)

Counter({3: 1000,
         5: 1000,
         8: 1000,
         7: 1000,
         0: 1010,
         1: 1000,
         9: 1000,
         4: 990,
         2: 1000,
         6: 1000})

In [23]:
X_train.shape

numpy.ndarray

In [24]:
np.save('X_train.npy',X_train)
np.save('Y_train.npy',Y_train)

In [25]:
dsetest = DataSetAudio(dset_path=data_path,max_lengh=2000)
dloadertest = DataLoader(dsetest, batch_size=32, shuffle=True)
X_test, Y_test = gen_training_samples(dloadertest)

100%|██████████| 63/63 [00:34<00:00,  1.81it/s]


In [26]:
np.save('X_test.npy',X_test)
np.save('Y_test.npy',Y_test)