In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import math
import os
import librosa
from tqdm import tqdm
import matplotlib.pyplot as plt
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
music_path = r'/content/drive/MyDrive/Thesis/Datasets/GTZAN Dataset/genres_original'

n_mfcc 	= number of MFCCs to return<br>
n_fft 		= length of FFT (Fast Fourier Transform) Window (default 2048)<br>
hop_length	= samples between successive frames (default 512)<br>
ff_dim 		= feed forward dimension (output Conv1D)<br>
mlp_units 	= output dense layer in mlp units  (array for loop multiple layer)<br>
mlp_dropout 	= dropout rate for dropout layer in mlp units<br>
batch_size 	= 16 (input length 31955 / batch_size 16 = 1997.1 -> ceil = 1998 di fit progress bar)

In [None]:
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

num_segments = 10
hop_length = 512
sample_ps = int(SAMPLES_PER_TRACK/num_segments)
expected_vects_ps = math.ceil(sample_ps/hop_length)
n_fft = 2048
n_mfcc = 13

In [None]:
def add_noise(data, noise_factor):
    noise = np.random.randn(len(data.reshape(-1, 1))).reshape(data.shape)
    add_noise_data = data + noise_factor * noise
    # Cast back to same data type
    add_noise_data = add_noise_data.astype(type(data[0]))
    return add_noise_data

In [None]:
def shift_time(data, sampling_rate, shift_max):
    shift = np.random.randint(sampling_rate * shift_max)

    direction = np.random.randint(0, 2)
    if direction == 1:
        shift = -shift
        
    shifted_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        shifted_data[:shift] = 0
    else:
        shifted_data[shift:] = 0
    return shifted_data

In [None]:
data = {
    "mapping": [],
    "mfcc"   : [],
    "targets" : [],
}

for i, (dirpath, dirnames, filenames) in enumerate(os.walk(music_path)):
    if dirpath is not music_path:
        dirpath_comp = dirpath.split("/")
        semantic_label = dirpath_comp[-1]
        data["mapping"].append(semantic_label)

        print(f"Processing : {i-1} {semantic_label}")

        for f in filenames:
            file_path = os.path.join(dirpath, f)
            signal,sr = librosa.load(file_path, sr=SAMPLE_RATE)
            
            noise_signal = add_noise(signal, 0.1)
            shift_signal = shift_time(signal, SAMPLE_RATE, 1)
            pitch_signal = librosa.effects.pitch_shift(signal, sr=SAMPLE_RATE, n_steps=4)
            
            for s in range(num_segments):
                start_sample = sample_ps * s
                finish_sample = start_sample + sample_ps
                
                mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
                                            sr = sr,
                                            n_fft = n_fft,
                                            n_mfcc = n_mfcc,
                                            hop_length = hop_length).T
                noise_mfcc = librosa.feature.mfcc(noise_signal[start_sample:finish_sample],
                                            sr = sr,
                                            n_fft = n_fft,
                                            n_mfcc = n_mfcc,
                                            hop_length = hop_length).T
                shift_mfcc = librosa.feature.mfcc(shift_signal[start_sample:finish_sample],
                                            sr = sr,
                                            n_fft = n_fft,
                                            n_mfcc = n_mfcc,
                                            hop_length = hop_length).T
                pitch_mfcc = librosa.feature.mfcc(pitch_signal[start_sample:finish_sample],
                                            sr = sr,
                                            n_fft = n_fft,
                                            n_mfcc = n_mfcc,
                                            hop_length = hop_length).T

                if len(mfcc)==expected_vects_ps:
                    data["mfcc"].append(mfcc.tolist())
                    data["targets"].append(i-1)
                    
                    data["mfcc"].append(noise_mfcc.tolist())
                    data["targets"].append(i-1)
                    
                    data["mfcc"].append(shift_mfcc.tolist())
                    data["targets"].append(i-1)
                    
                    data["mfcc"].append(pitch_mfcc.tolist())
                    data["targets"].append(i-1)
                else:
                    print(f"Skipped : {i-1} {f}, len mfcc : {len(mfcc)}, expected : {expected_vects_ps}")

Processing : 0 blues
Processing : 1 country
Skipped : 1 country.00007.wav, len mfcc : 129, expected : 130
Processing : 2 disco
Skipped : 2 disco.00014.wav, len mfcc : 129, expected : 130
Processing : 3 rock
Processing : 4 hiphop
Skipped : 4 hiphop.00032.wav, len mfcc : 127, expected : 130
Processing : 5 pop
Processing : 6 classical
Skipped : 6 classical.00051.wav, len mfcc : 129, expected : 130
Processing : 7 reggae
Processing : 8 jazz
Processing : 9 metal


In [None]:
print(len(data['mfcc']))
print(len(data['targets']))

39944
39944


In [None]:
import h5py

In [None]:
hf = h5py.File(r'/content/drive/MyDrive/Thesis/Datasets/music_dataset_augmented.h5', 'w')
hf.create_dataset('inputs', data=data['mfcc'])
hf.create_dataset('targets', data=data['targets'])
hf.close()

In [None]:
import pickle
with open('/content/drive/MyDrive/Thesis/Datasets/data_mapping.pkl', 'wb') as f:
    pickle.dump(data['mapping'], f)

In [None]:
with open('/content/drive/MyDrive/Thesis/Datasets/data_mapping.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

loaded_dict

['blues',
 'country',
 'disco',
 'rock',
 'hiphop',
 'pop',
 'classical',
 'reggae',
 'jazz',
 'metal']

In [None]:
def load_data(dataset_path):
    hf = h5py.File(dataset_path, 'r')
    inputs = hf.get('inputs')
    targets = hf.get('targets')
    
    inputs = np.array(inputs)
    targets = np.array(targets)
    hf.close()
    
    return inputs, targets

In [None]:
inputs, targets = load_data(r'/content/drive/MyDrive/Thesis/Datasets/music_dataset_augmented.h5')
print(inputs.shape, targets.shape)

(9986, 130, 13) (9986,)
(19972, 130, 13) (19972,)
(19972, 130, 13) (19972,)
(19972, 130, 13) (19972,)
(39944, 130, 13) (39944,)
