In [1]:
import tensorflow as tf
import sklearn
import keras
from keras.layers import Dense, Dropout, Conv2D, Flatten
from keras.models import Sequential
import sklearn.metrics
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import os
import h5py
import pickle
from collections import deque
import IPython
import time
import random
tf.__version__


Using TensorFlow backend.


'1.1.0'

In [2]:
if os.name == "nt":
    root = "E:/dataset"
else:
    root = "/media/ritchie46/5E061D60061D3A8B/dataset"

genres = ["hiphop", "edm_dance", "classical", "metal", "jazz"]

shape = (10,2584)

In [3]:
with open(os.path.join(root, "mfcc_scaler.pkl"), "rb") as f:
    scl = pickle.load(f)

In [4]:
if os.name == "nt":
    dset_f = h5py.File(os.path.join(r"C:\Users\vik\Desktop\empty", "mfcc_dset.hdf5"), "r")
else:
    dset_f = h5py.File(os.path.join(r"/home/ritchie46/Documents", "mfcc_dset.hdf5"), "r")

In [5]:
a = np.array(dset_f["labels"])
n_classes = np.unique(dset_f["labels"]).size

def create_class_weights(class_distru):
    total = sum(class_distru.values())
    new = dict()
    for i in classes:
        new[i] = total / classes[i]
        
    return new
    

classes = dict()

for i in range(n_classes):
    classes[i] = np.count_nonzero(a == i)
    
class_weights = create_class_weights(classes)
total = len(a)
print(n_classes, total, "\n", genres, "\n", class_weights)
del a


5 8366 
 ['hiphop', 'edm_dance', 'classical', 'metal', 'jazz'] 
 {0: 6.925496688741722, 1: 3.355796229442439, 2: 3.1239731142643765, 3: 5.325270528325907, 4: 20.110576923076923}


In [6]:
shuffle_seed = np.arange(0, total)
np.random.shuffle(shuffle_seed)

In [91]:

def generate_batch(batch_size, seed, test=False):  
    while True:
        if not test:
            print("newloop")
        data = deque()
        labels = deque()

        if test:
            seed_n = seed[6800:-3]
        else:
            seed_n = seed[:6800]
        c = 0
        np.random.shuffle(seed_n)
        
        for i in seed_n:
            x = dset_f["data"][i]
            y = dset_f["labels"][i]
            y = np.eye(n_classes)[y].reshape(n_classes)
            x = scl.transform(x.reshape(1, x.size)).reshape(10, x.size // 10)
            
            data.append(x)
            labels.append(y)

            if len(data) > batch_size:
                data.popleft()
                labels.popleft()            

            if (c + 1) % batch_size == 0:
#                 print('yield')
                yield np.array(data), np.array(labels)

            c += 1

            
def augment(batch_size, test=False):
    """
    Augments the data passed through from the data generator.
    :param bath_size: (int) Maximum batch size is 1000. Choose the batch size so that
                            1000 % batch_size == 0.
    :param test: (bool): Augment testing data.
    """
    n_yield = 320
    if test:
        gen = generate_batch(n_yield, shuffle_seed, True)
    else:
        gen = generate_batch(n_yield, shuffle_seed)
    while True:
        batch = next(gen)
        x = batch[0]
        y = batch[1]

        # Return every nth column of the array. This is losing information.
        n = 4
        
        # Last column of the time signal
        edge = x.shape[2] - n
        
        data = np.zeros((n_yield * n, 10, edge // n))
        labels = deque()
       
        for i in range(x.shape[0]):
            for j in range(n):
                for k in range(x[i].shape[0]):
                    data[i][k] = x[i][k][j: edge + j][::n]
                    labels.append(y[i])
        
        labels = np.array(labels)
#       print(data.shape[0])
        seed_n = np.arange(0, data.shape[0])
        np.random.shuffle(seed_n)
        data = data[seed_n]
        labels = labels[seed_n]

        for i in range(data.shape[0] // batch_size):
            return_data = data[i * batch_size: (i + 1) * batch_size]

            yield return_data.reshape(batch_size, 10, 645, 1), labels[i * batch_size: (i + 1) * batch_size]

                    
# #gen = augment(40, 100)
# next(gen)[0].shape

In [85]:
with tf.device('/gpu:0'):
    model = Sequential()
    model.add(Conv2D(40, (10, 2), strides=2, padding="same", activation="relu", input_shape=(10, 645, 1)))
    model.add(Flatten())
    model.add(Dense(400, activation="relu", 
                    kernel_regularizer=keras.regularizers.l2(0.01)))
    # model.add(Dropout(0.2))
    model.add(Dense(n_classes, activation="softmax"))

    model.compile(optimizer=keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [92]:
gen = augment(40)
gen_test = augment(20, True)
epoch = 0

In [None]:
model.compile(optimizer=keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
n = 20
model.fit_generator(gen, 509, epochs=epoch + n, verbose=1, initial_epoch=epoch, 
                   validation_data=gen_test,
                   validation_steps=20,
                   class_weight=class_weights,
                   callbacks=[#keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=10, patience=200, verbose=0, mode='auto'),
                             keras.callbacks.TensorBoard(log_dir=root + '/Graph/run_conv', histogram_freq=0,  
          write_graph=False, write_images=False)])
epoch += n

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20

In [32]:
#gen_test = augment(1600 * 100, True)
gen_test2 = augment(100, True) 
a = next(gen_test2)
print(a[0].shape)
true = model.predict_classes(a[0])
#["hiphop", "edm_dance", "classical", "metal", "jazz"]
print("\n",sklearn.metrics.classification_report(true, np.argmax(a[1], 1)))
print(sklearn.metrics.accuracy_score(true, np.argmax(a[1], 1)))
del gen_test2
del a
del true

(1000, 1250)
  32/1000 [..............................] - ETA: 0s
              precision    recall  f1-score   support

          0       0.01      0.01      0.01       149
          1       0.00      0.00      0.00       108
          2       1.00      0.75      0.86       400
          3       0.77      1.00      0.87       232
          4       0.00      0.00      0.00       111

avg / total       0.58      0.53      0.55      1000

0.534


  'precision', 'predicted', average, warn_for)


In [25]:

import librosa
import scipy
audio = "mp3"
genres_d = os.listdir(os.path.join(root, audio))
for i in range(10):
    label = random.randint(0, len(genres) - 1)
    genre_current = genres[label]
    if genre_current in genres:
        nrs = os.listdir(os.path.join(root, audio, genre_current))
        choice = random.randint(0, len(nrs) - 1)
        a = IPython.display.Audio(filename=os.path.join(root, audio, genre_current, nrs[choice]))
        wav = os.path.join(root, "wav", genre_current, nrs[choice].replace("mp3", "wav"))
        sr, ampl = scipy.io.wavfile.read(wav)
        if ampl.shape[0] == 1323000:
            mfcc = np.load(os.path.join(root, "mfcc", genre_current, nrs[choice].replace("mp3", "wav.npy")))
            mfcc = mfcc.reshape((1, 10 * 2584))
            mfcc = scl.transform(mfcc)
            print(mfcc.shape[0])
            mfcc = mfcc[:,:25000][:,::20]
            label = np.eye(6)[label].reshape(1, 6)
            print(os.path.join(root, audio, genre_current, nrs[choice]))
            
            p = np.argmax(model.predict(mfcc, 1))
            print("Neural nets predection:", genres[p])
            IPython.display.display(a)


1
/media/ritchie46/5E061D60061D3A8B/dataset/mp3/classical/Luigi_Boccherini-Boccherini__String_Quintet_in_E_Major__G__275__III__Minuetto.mp3
Neural nets predection: classical


1
/media/ritchie46/5E061D60061D3A8B/dataset/mp3/classical/Orlande_de_Lassus-Psalmi_Davidis_poenitentiales__Psalmus_Primus_Poenitentialis__Domine__ne_in_furore_tuo_arguas_me_.mp3
Neural nets predection: classical


1
/media/ritchie46/5E061D60061D3A8B/dataset/mp3/hiphop/Boef-Habiba.mp3
Neural nets predection: hiphop


In [168]:
model.save(os.path.join(root, "mlp_360.h5"))