In [33]:
import glob
import os
import matplotlib
# to define plot backends, pick one of those: Agg, Qt4Agg, TkAgg
matplotlib.use('TkAgg')
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support  

######################### Helper Methods ################################

def loadMusics(filePath):
	musics = []
	for path in filePath:
		X, sr = librosa.load(path)
		musics.append(X)
	return musics

def featureExtraction(fileName):
	raw, rate = librosa.load(fileName)
	stft = np.abs(librosa.stft(raw))
	mfcc = np.mean(librosa.feature.mfcc(y=raw,sr=rate,n_mfcc=40).T, axis=0)
	chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=rate).T, axis=0)  #barakhadi
	mel = np.mean(librosa.feature.melspectrogram(raw, sr=rate).T, axis=0)
	contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=rate).T, axis=0)
	tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(raw), sr=rate).T, axis=0)
	return mfcc, chroma, mel, contrast, tonnetz

# Takes parent directory name, subdirectories within parent directory, and file extension as input. 
def parseAudio(parentDirectory, subDirectories, fileExtension="*.au"):
	features, labels = np.empty((0,193)), np.empty(0)
	for subDir in subDirectories:
		for fn in glob.glob(os.path.join(parentDirectory, subDir, fileExtension)):
			mfcc, chroma, mel, contrast, tonnetz = featureExtraction(fn)
			tempFeatures = np.hstack([mfcc, chroma, mel, contrast, tonnetz])
			features = np.vstack([features, tempFeatures])
			# pop = 1, jazz = 2, metal = 3, rock = 0
			if subDir == "pop":
				labels = np.append(labels,1)
			elif subDir == "jazz":
				labels = np.append(labels,2)
			elif subDir == "metal":
				labels = np.append(labels,3)
			else : # Corresponds to "rock"
				labels = np.append(labels,0)
	return np.array(features), np.array(labels, dtype=np.int)

#splitting into training testing
training = "C:\\Users\\Ameya\\Desktop\\projectsem4\\trainzip"
test = "C:\\Users\\Ameya\\Desktop\\projectsem4\\testzip"

subDirectories = ["pop", "jazz", "metal", "rock"]
trainingFeatures, trainingLabels = parseAudio(training, subDirectories)
print(trainingLabels)
# Test Labels [1 1 2 2 3 3 0 0]
testFeatures, testLabels = parseAudio(test, subDirectories)


	
def oneHotEncoder(labels):
	n = len(labels)
	nUnique = len(np.unique(labels))
	encoder = np.zeros((n, nUnique))
	encoder[np.arange(n), labels] = 1
	return encoder

trainingLabels = oneHotEncoder(trainingLabels)
testLabels = oneHotEncoder(testLabels)

epochs = 5000
# trainingFeatures is a 32 x 193 matrix
nDim = trainingFeatures.shape[1]#Number of columns in the training features
nClasses = 4
nHiddenUnitsOne = 280	
nHiddenUnitsTwo = 300
sd = 1 / np.sqrt(nDim)
learningRate = 0.01


X = tf.placeholder(tf.float32,[None,nDim])#nDim is the number of frames
Y = tf.placeholder(tf.float32,[None,nClasses])#nClasses is the number of genres

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [34]:
X

<tf.Tensor 'Placeholder_4:0' shape=(?, 193) dtype=float32>

In [35]:
Y

<tf.Tensor 'Placeholder_5:0' shape=(?, 4) dtype=float32>

In [36]:
W1 = tf.Variable(tf.random_normal([nDim,nHiddenUnitsOne], mean = 0, stddev=sd))
b1 = tf.Variable(tf.random_normal([nHiddenUnitsOne], mean = 0, stddev=sd))
h1 = tf.nn.tanh(tf.matmul(X,W1) + b1)

In [37]:
W1

<tf.Variable 'Variable_12:0' shape=(193, 280) dtype=float32_ref>

In [38]:
b1

<tf.Variable 'Variable_13:0' shape=(280,) dtype=float32_ref>

In [39]:
h1

<tf.Tensor 'Tanh_2:0' shape=(?, 280) dtype=float32>

In [40]:
W2 = tf.Variable(tf.random_normal([nHiddenUnitsOne,nHiddenUnitsTwo], mean = 0, stddev=sd))
b2 = tf.Variable(tf.random_normal([nHiddenUnitsTwo], mean = 0, stddev=sd))
h2 = tf.nn.sigmoid(tf.matmul(h1,W2) + b2)

In [41]:
W = tf.Variable(tf.random_normal([nHiddenUnitsTwo,nClasses], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([nClasses], mean = 0, stddev=sd))
y = tf.nn.softmax(tf.matmul(h2,W) + b)

In [42]:

init = tf.global_variables_initializer()

costFunction = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(y), reduction_indices=[1])) 

In [43]:
costFunction

<tf.Tensor 'Mean_6:0' shape=() dtype=float32>

In [44]:
costFunction = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(y),axis=1)) 
optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(costFunction)

In [45]:

correctPrediction = tf.equal(tf.argmax(y,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correctPrediction, tf.float32))

In [46]:
costHistory = np.empty(shape=[1],dtype=float)
yTrue, yPred = None, None
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs):            
        cost = sess.run([optimizer,costFunction],feed_dict={X:trainingFeatures,Y:trainingLabels})
        costHistory = np.append(costHistory,cost)
    
    yPred = sess.run(tf.argmax(y,1),feed_dict={X: testFeatures})
    yTrue = sess.run(tf.argmax(testLabels,1))

In [49]:
p,r,f,s = precision_recall_fscore_support(yTrue, yPred, average='micro')
print ("F-Score:", round(f,3)*100)
print("TLabels:", yTrue)
print("PLabels:", yPred)

F-Score: 75.0
TLabels: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
PLabels: [1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 2 2 1 2 2 2 2 2 2 0 0 1 0 0 2 0 3 3 3 3 3
 3 2 3 3 3 3 3 3 3 3 3 0 3 2 0 3 2 2 0 0 0 0 0 0 0 0 0]


In [48]:
from sklearn.metrics import confusion_matrix
m = confusion_matrix(yTrue, yPred)
print(m)

[[11  0  3  2]
 [ 0 13  3  0]
 [ 5  2  9  0]
 [ 0  0  1 15]]


In [50]:
print ("F-Score:", round(f,3)*100)

F-Score: 75.0
