In [1]:
import warnings
warnings.filterwarnings("ignore", category = FutureWarning)

import os
import keras
#import h5py
import librosa
import itertools
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict


from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import Dropout
from keras.models import Model
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.


In [2]:
"""
@description: Method to split a song into multiple songs using overlapping windows
"""
def splitsongs(X, y, window = 0.1, overlap = 0.5):
    # Empty lists to hold our results
    temp_X = []
    temp_y = []

    # Get the input song array size
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))
    
    # Split the song and create new ones on windows
    spsong = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for s in spsong:
        temp_X.append(s)
        temp_y.append(y)

    return np.array(temp_X), np.array(temp_y)

In [3]:

"""
@description: Method to convert a list of songs to a np array of melspectrograms
"""
def to_melspectrogram(songs, n_fft = 1024, hop_length = 512):
    # Transformation function
    melspec = lambda x: librosa.feature.melspectrogram(x, n_fft = n_fft,
        hop_length = hop_length)[:,:,np.newaxis]

    # map transformation of input songs to melspectrogram using log-scale
    tsongs = map(melspec, songs)
    return np.array(list(tsongs))

In [7]:

def read_data(src_dir, genres, song_samples, spec_format, debug = True):    
    # Empty array of dicts with the processed features from all files
    arr_specs = []
    arr_genres = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
        
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                signal = signal[:song_samples]
                
                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))
                
                # Convert to dataset of spectograms/melspectograms
                signals, y = splitsongs(signal, genres[x])
                
                # Convert to "spec" representation
                specs = spec_format(signals)
                
                # Save files
                arr_genres.extend(y)
                arr_specs.extend(specs)
                
                
    return np.array(arr_specs), np.array(arr_genres)

In [12]:
# Parameters
gtzan_dir = './Data/genres/'
song_samples = 660000
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [None]:
# Read the data
X, y = read_data(gtzan_dir, genres, song_samples, to_melspectrogram, debug=True)
np.save('x_gtzan_npy.npy', X)
np.save('y_gtzan_npy.npy', y)

Reading file: ./Data/genres/blues/blues.00088.wav
Reading file: ./Data/genres/blues/blues.00091.wav
Reading file: ./Data/genres/blues/blues.00023.wav
Reading file: ./Data/genres/blues/blues.00024.wav
Reading file: ./Data/genres/blues/blues.00070.wav
Reading file: ./Data/genres/blues/blues.00053.wav
Reading file: ./Data/genres/blues/blues.00085.wav
Reading file: ./Data/genres/blues/blues.00033.wav
Reading file: ./Data/genres/blues/blues.00019.wav
Reading file: ./Data/genres/blues/blues.00031.wav
Reading file: ./Data/genres/blues/blues.00020.wav
Reading file: ./Data/genres/blues/blues.00001.wav
Reading file: ./Data/genres/blues/blues.00046.wav
Reading file: ./Data/genres/blues/blues.00098.wav
Reading file: ./Data/genres/blues/blues.00017.wav
Reading file: ./Data/genres/blues/blues.00009.wav
Reading file: ./Data/genres/blues/blues.00040.wav
Reading file: ./Data/genres/blues/blues.00074.wav
Reading file: ./Data/genres/blues/blues.00086.wav
Reading file: ./Data/genres/blues/blues.00068.wav


In [10]:

X = np.load('x_gtzan_npy.npy')
y = np.load('y_gtzan_npy.npy')

In [11]:
X

array([], dtype=float64)