In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
import os
import librosa as lbrs
import wave as wv
import glob
import audioread as ar
import binascii
import sys
import random

  from ._conv import register_converters as _register_converters


In [2]:
genres_path = os.path.expanduser("~/Downloads/genres/*/*.au")

In [3]:
genre_map = {
  'pop': 0,
  'metal': 1,
  'disco': 2,
  'blues': 3,
  'reggae': 4,
  'classical': 5,
  'rock': 6,
  'hiphop': 7,
  'country': 8,
  'jazz': 9
}

sample_length = 3072

In [4]:
def indices_to_one_hot(dta):
  one_hot = np.zeros((len(dta), len(np.unique(dta))), np.uint8)
  one_hot[np.arange(len(dta)), dta] = 1
  return one_hot

In [5]:
def read_audio_file(file):
    file_data = np.zeros(sample_length, np.uint8)
    with ar.audio_open(audio_file_path) as audio_file:
      count = 0
      sys.stdout.write('.')
      for buf in audio_file:
        for sample in buf:
          if count >= sample_length:
            break
          file_data[count] = sample
          count = count + 1
        if count >= sample_length:
            break
    return file_data

In [6]:
def getbatch(alldata, alllabels, batch_size = 16):
    count = 0
    newdata = alldata.copy()
    arraylength = np.ma.size(newdata, axis=0)
    while count < arraylength/batch_size:
        randstart = random.randint(0, arraylength-batch_size-1)
        count += 1
        x = newdata[randstart:randstart+batch_size]
        yield (x, alllabels[randstart:randstart+batch_size])

In [7]:
labels = np.ndarray(len(glob.glob(genres_path)), np.uint8)
data = np.ndarray((len(glob.glob(genres_path)), sample_length))

count = 0

for num, audio_file_path in enumerate(glob.glob(genres_path), start=0):
    genre = audio_file_path.split('/')[-2]
    labels[num] = genre_map[genre]
    data[num] = read_audio_file(audio_file_path)

labels = indices_to_one_hot(labels)

print(data.shape)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [8]:
print(labels.shape)

(1000, 10)


In [9]:
print(data.shape)

(1000, 3072)


In [10]:
data = data.copy() / data.max()

In [11]:
train_test_mask = np.random.rand(1000,) > 0.2
train_test_mask = np.full(1000, False)
train_test_mask[:800] = True
np.random.shuffle(train_test_mask)

train_data = data[train_test_mask,:,]
test_data = data[~train_test_mask,:,]

train_labels = labels[train_test_mask]
test_labels = labels[~train_test_mask]

print(train_data.shape)
print(test_data.shape)

(800, 3072)
(200, 3072)


In [12]:
ninput = sample_length
noutput = len(genre_map)

nepochs = 10
batch_size = len(train_data) / 10
learningrate = 0.001
nhidden1 = 512
nhidden2 = 256
nhidden3 = 128

In [13]:
X = tf.placeholder(tf.float32, [None, ninput])
Y = tf.placeholder(tf.float32, [None, noutput])

In [14]:
weights = \
{
  'h1': tf.Variable(tf.random_normal([ninput, nhidden1])),
  'h2': tf.Variable(tf.random_normal([nhidden1, nhidden2])),
  'h3': tf.Variable(tf.random_normal([nhidden2, nhidden3])),
  'out': tf.Variable(tf.random_normal([nhidden3, noutput]))
}

biases = \
{
  'b1': tf.Variable(tf.random_normal([nhidden1])),
  'b2': tf.Variable(tf.random_normal([nhidden2])),
  'b3': tf.Variable(tf.random_normal([nhidden3])),
  'out': tf.Variable(tf.random_normal([noutput]))
}

In [15]:
def multiperceptron(x):
    l1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    l2 = tf.nn.sigmoid(tf.add(tf.matmul(l1, weights['h2']), biases['b2']))
    l3 = tf.nn.sigmoid(tf.add(tf.matmul(l2, weights['h3']), biases['b3']))
    outl = tf.nn.sigmoid(tf.add(tf.matmul(l3, weights['out']), biases['out']))
    return outl
    
model = multiperceptron(X)

In [16]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learningrate)
train_min = optimizer.minimize(loss)

correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

In [17]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(nepochs):
        for batch_x, batch_y in getbatch(train_data, train_labels, 512):
            # Use training data for optimization
            sess.run(train_min, feed_dict={X:batch_x, Y:batch_y})

        # Validate after every epoch
        arraylength = len(train_data)
        randstart = random.randint(0, arraylength-batch_size-1)
        batch_x = train_data.copy()[randstart:randstart+512]
        batch_y = train_labels.copy()[randstart:randstart+512]
        losscalc, accuracycalc = sess.run([loss, accuracy], feed_dict={X:batch_x, Y:batch_y})
        print("Epoch: %d, Loss: %0.4f, Accuracy: %0.4f"%(epoch, losscalc, accuracycalc))
            
    # When the training is complete and you are happy with the result
    testing_x = test_data.reshape((test_data.shape[0], sample_length))
    testing_y = test_labels

    accuracycalc = sess.run(accuracy, feed_dict={X: testing_x, Y: testing_y})
    print("Testing accuracy: %0.4f"%(accuracycalc))

Epoch: 0, Loss: 2.3029, Accuracy: 0.2054
Epoch: 1, Loss: 2.2312, Accuracy: 0.2172
Epoch: 2, Loss: 2.1983, Accuracy: 0.2241
Epoch: 3, Loss: 2.1363, Accuracy: 0.2004
Epoch: 4, Loss: 1.9644, Accuracy: 0.2246
Epoch: 5, Loss: 2.0155, Accuracy: 0.2031
Epoch: 6, Loss: 2.1552, Accuracy: 0.2540
Epoch: 7, Loss: 1.8896, Accuracy: 0.2891
Epoch: 8, Loss: 2.4856, Accuracy: 0.0229
Epoch: 9, Loss: 1.8358, Accuracy: 0.3555
Testing accuracy: 0.0900
