In [1]:
import os
import sys
import timeit
import numpy as np
from pydub import AudioSegment
import pydub
from matplotlib import pylab


In [2]:
GENRE_DIR = "/home/mike/Desktop/EE379K/FinalProject/genres"
GENRE_LIST = ("blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock")

In [7]:
def convert_dataset_to_wav():
    """
        Converts all files of the GTZAN dataset
        to the WAV (uncompressed) format.
    """
    start = timeit.default_timer()

    for subdir, dirs, files in os.walk(GENRE_DIR):
        print("Converting au to wav " + subdir)
        for file in files:
            path = subdir+'/'+file
            
            if path.endswith("au"):
                song = AudioSegment.from_file(path,"au")
                song = song[:30000]
                song.export(path[:-3]+".wav",format='wav')

    stop = timeit.default_timer()
    print ("Conversion time = ", (stop - start))
    
convert_dataset_to_wav()

Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/rock
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/hiphop
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/pop
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/disco
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/country
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/jazz
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/metal
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/reggae
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/classical
Converting au to wav /home/mike/Desktop/EE379K/FinalProject/genres/blues
('Conversion time = ', 0.0446619987487793)


In [6]:
for subdir, dirs, files in os.walk(GENRE_DIR):
        for file in files:
            path = subdir+'/'+file
            if path.endswith("wav"):
                os.remove(path)

In [4]:
import os
import glob
import sys
import numpy as np
import scipy
import scipy.io.wavfile

def create_npy(fn):
    """
        Creates the MFCC features. 
    """    
    sample_rate, X = scipy.io.wavfile.read(fn)
    #X[X==0] = 1
    base_fn, ext = os.path.splitext(fn)
    #print(base_fn + ext)
    np.save(base_fn + ext, X)


def read_npy(train_fraction, genre_list=GENRE_LIST, base_dir=GENRE_DIR):
    """
        Reads the MFCC features from disk and
        returns them in a numpy array.
    """
    X = []
    y = []
    X_test = []
    y_test = []
    num_songs = 100*train_fraction
    for label, genre in enumerate(genre_list):
        i = 0
        for fn in glob.glob(os.path.join(base_dir, genre, "*.npy")):
            npy = np.load(fn)
            npy.resize(661500)

            if(i < num_songs):
                X.append(npy)
                y.append(label)
            else:
                X_test.append(npy)
                y_test.append(label)  
            
            i += 1

    return np.array(X), np.array(y), np.array(X_test), np.array(y_test)



In [36]:
start = timeit.default_timer()
for subdir, dirs, files in os.walk(GENRE_DIR):
    traverse = list(set(dirs).intersection( set(GENRE_LIST) ))
    break
print("Working with these genres --> ", traverse)
print("Starting npy generation") 
for subdir, dirs, files in os.walk(GENRE_DIR):
    print("Converting " + subdir)
    for file in files:
        path = subdir+'/'+file
        if path.endswith("wav"):
            tmp = subdir[subdir.rfind('/',0)+1:]
            if tmp in traverse:
                create_npy(path)

stop = timeit.default_timer()
print("Total npy generation and feature writing time (s) = ", (stop - start))

('Working with these genres --> ', ['reggae', 'classical', 'country', 'jazz', 'metal', 'pop', 'disco', 'hiphop', 'rock', 'blues'])
Starting npy generation
Converting /home/mike/Desktop/EE379K/FinalProject/genres
Converting /home/mike/Desktop/EE379K/FinalProject/genres/rock
Converting /home/mike/Desktop/EE379K/FinalProject/genres/hiphop
Converting /home/mike/Desktop/EE379K/FinalProject/genres/pop
Converting /home/mike/Desktop/EE379K/FinalProject/genres/disco
Converting /home/mike/Desktop/EE379K/FinalProject/genres/country
Converting /home/mike/Desktop/EE379K/FinalProject/genres/jazz
Converting /home/mike/Desktop/EE379K/FinalProject/genres/metal
Converting /home/mike/Desktop/EE379K/FinalProject/genres/reggae
Converting /home/mike/Desktop/EE379K/FinalProject/genres/classical
Converting /home/mike/Desktop/EE379K/FinalProject/genres/blues
('Total npy generation and feature writing time (s) = ', 27.151424884796143)


In [9]:
X, y, X_test, y_test = read_npy(train_fraction=0.9, genre_list=('rock', 'classical'))
print(X.shape)
print(y.shape)
print(X_test.shape)
print(y_test.shape)



(180, 661500)
(180,)
(20, 661500)
(20,)


In [11]:
from keras.utils.np_utils import to_categorical
import tensorflow

y = to_categorical(y, num_classes=None)
y_test = to_categorical(y, num_classes=None)
print(y.shape)

Using TensorFlow backend.


(180, 2)


In [27]:
X = X.reshape((X.shape[0], X.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print(X.shape)
print(X_test.shape)

(180, 661500, 1)
(20, 661500, 1)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.preprocessing import sequence


model = Sequential()
model.add(LSTM(10, input_shape=(661500, 1)))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X, y, nb_epoch=3, batch_size=1)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_15 (LSTM)               (None, 10)                480       
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 22        
Total params: 502.0
Trainable params: 502
Non-trainable params: 0.0
_________________________________________________________________
None
Epoch 1/3
