In [8]:
%matplotlib inline
import os
import sys
import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display

import utils

# BPNN CLASSES

In [9]:
#functions of non-linear activations
def f_sigmoid(X, deriv=False):
    if not deriv:
        return 1 / (1 + np.exp(-X))
    else:
        return f_sigmoid(X)*(1 - f_sigmoid(X))


def f_softmax(X):
    Z = np.sum(np.exp(X), axis=1)
    Z = Z.reshape(Z.shape[0], 1)
    return np.exp(X) / Z


#def f_relu(X):
 #   return np.maximum(0,X)


In [10]:
def exit_with_err(err_str):
    print(sys.stderr, err_str)
    sys.exit(1)

In [11]:
#Functionality of a single hidden layer
class Layer:
    def __init__(self, size, batch_size, is_input=False, is_output=False,
                 activation=f_sigmoid):
        self.is_input = is_input
        self.is_output = is_output

        # Z is the matrix that holds output values
        self.Z = np.zeros((batch_size, size[0]))
        # The activation function is an externally defined function (with a
        # derivative) that is stored here
        self.activation = activation

        # W is the outgoing weight matrix for this layer
        self.W = None
        # S is the matrix that holds the inputs to this layer
        self.S = None
        # D is the matrix that holds the deltas for this layer
        self.D = None
        # Fp is the matrix that holds the derivatives of the activation function
        self.Fp = None

        if not is_input:
            self.S = np.zeros((batch_size, size[0]))
            self.D = np.zeros((batch_size, size[0]))

        if not is_output:
            self.W = np.random.normal(size=size, scale=1E-4)

        if not is_input and not is_output:
            self.Fp = np.zeros((size[0], batch_size))

    def forward_propagate(self):
        if self.is_input:
            return self.Z.dot(self.W)

        self.Z = self.activation(self.S)
        if self.is_output:
            return self.Z
        else:
            # For hidden layers, we add the bias values here
            self.Z = np.append(self.Z, np.ones((self.Z.shape[0], 1)), axis=1)
            self.Fp = self.activation(self.S, deriv=True).T
            return self.Z.dot(self.W)


In [59]:
class MultiLayerPerceptron:
    def __init__(self, layer_config, batch_size=100,activation_o = f_softmax):
        self.layers = []
        self.num_layers = len(layer_config)
        self.minibatch_size = batch_size

        for i in range(self.num_layers-1):
            if i == 0:
                print ("Initializing input layer with size {0}.".format(layer_config[i]))
                # Here, we add an additional unit at the input for the bias
                # weight.
                self.layers.append(Layer([layer_config[i]+1, layer_config[i+1]],
                                         batch_size,
                                         is_input=True))
            else:
                print ("Initializing hidden layer with size {0}.".format(layer_config[i]))
                # Here we add an additional unit in the hidden layers for the
                # bias weight.
                self.layers.append(Layer([layer_config[i]+1, layer_config[i+1]],
                                         batch_size,
                                        activation=f_sigmoid))


        print ("Initializing output layer with size {0}.".format(layer_config[-1]))
        self.layers.append(Layer([layer_config[-1], None],
                                 batch_size,
                                 is_output=True,
                                 activation=activation_o))
        print ("Done!")

    def forward_propagate(self, data):
        # We need to be sure to add bias values to the input
        self.layers[0].Z = np.append(data, np.ones((data.shape[0], 1)), axis=1)

        for i in range(self.num_layers-1):
            self.layers[i+1].S = self.layers[i].forward_propagate()
        return self.layers[-1].forward_propagate()

    def backpropagate(self, yhat, labels):
        
        #Check!
        #exit_with_err("FIND ME IN THE CODE, What is computed in the next line of code?\n")
        
        #The differnce between the predicted labels from the neural network and the correct labels
        #Calculates the derivate of the cost function
        self.layers[-1].D = (yhat - labels).T
        
        for i in range(self.num_layers-2, 0, -1):
            # We do not calculate deltas for the bias values
            W_nobias = self.layers[i].W[0:-1, :]
            
           #exit_with_err("FIND ME IN THE CODE, What does this 'for' loop do?\n")
            
            #Backpropagates the delta with respect to the derivate of the activation function
            #Bakcpropagates the partial derivates
            self.layers[i].D = W_nobias.dot(self.layers[i+1].D) * self.layers[i].Fp

    def update_weights(self, eta):
        for i in range(0, self.num_layers-1):
            #Delta is used to know how the weights should be adjusted to minimize the error
            W_grad = -eta*(self.layers[i+1].D.dot(self.layers[i].Z)).T
            self.layers[i].W += W_grad

    def evaluate(self, train_data, train_labels, test_data, test_labels,
                 num_epochs=170, eta=0.05, eval_train=False, eval_test=True):

        N_train = len(train_labels)*len(train_labels[0])
        N_test = len(test_labels)*len(test_labels[0])

        print ("Training for {0} epochs...".format(num_epochs))
        for t in range(0, num_epochs):
            out_str = "[{0:4d}] ".format(t)

            for b_data, b_labels in zip(train_data, train_labels):
                output = self.forward_propagate(b_data)
                self.backpropagate(output, b_labels)
                
                #exit_with_err("FIND ME IN THE CODE, How does weight update is implemented? What is eta?\n")
                
                #It updates the weighs by multiplying the error with the output from itself and the delta 
                #from the next node in the network
                #eta is how aggressive the network "corrects itself" to changes
                self.update_weights(eta=eta)

            if eval_train:
                errs = 0
                for b_data, b_labels in zip(train_data, train_labels):
                    output = self.forward_propagate(b_data)
                    yhat = np.argmax(output, axis=1)
                    errs += np.sum(1-b_labels[np.arange(len(b_labels)), yhat])

                out_str = ("{0} Training error: {1:.5f}".format(out_str,
                                                           float(errs)/N_train))

            if eval_test:
                errs = 0
                for b_data, b_labels in zip(test_data, test_labels):
                    output = self.forward_propagate(b_data)
                    yhat = np.argmax(output, axis=1)
                    errs += np.sum(1-b_labels[np.arange(len(b_labels)), yhat])

                out_str = ("{0} Test error: {1:.5f}").format(out_str,
                                                       float(errs)/N_test)

            print (out_str)


In [13]:
def label_to_bit_vector(labels, nbits):
    print((labels.shape[0], nbits))
    bit_vector = np.zeros((labels.shape[0], nbits))
    print(bit_vector)
    for i in range(labels.shape[0]):
        bit_vector[i, labels[i]] = 1.0

    return bit_vector

In [17]:
def prepare_for_backprop(batch_size, Train_images, Train_labels, Valid_images, Valid_labels):
    
    print ("Creating data...")
    batched_train_data, batched_train_labels = create_batches(Train_images, Train_labels,
                                              batch_size,
                                              create_bit_vector=True)
    batched_valid_data, batched_valid_labels = create_batches(Valid_images, Valid_labels,
                                              batch_size,
                                              create_bit_vector=True)
    print ("Done!")


    return batched_train_data, batched_train_labels,  batched_valid_data, batched_valid_labels



In [16]:
def genre_to_bit_vector(genres, nbits):
        
    #print((genres.shape[0], nbits))
    bit_vector = np.zeros((genres.shape[0], nbits))
    
    #print(bit_vector)
    nr_genres = 0
    i =  0
    while nr_genres<genres.shape[0]:
        try:
            #print(str(nr_genres) + " " + str(genres[i]) )
            if(mydict[genres[i]]!= 0 | mydict[genres[i]]!= 4):
                bit_vector[nr_genres, 2] = 1.0
                i += 1
                nr_genres += 1
            else:
                #print(mydict[genres[i]])
                bit_vector[nr_genres, mydict[genres[i]]] = 1.0
                i += 1
                nr_genres += 1
        except KeyError:
            i += 1
    
    return bit_vector

In [19]:
def spectogram(songID,resample=False ,sampling=44100, plot = True):
    filepath = utils.get_audio_path('./fma_small/', songID)
    try:
        x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast
        print(sr)
    except FileNotFoundError:
        return -1
        
    
    if(resample):
        x = librosa.resample(x, sr, sampling)
        sr = sampling
    spec = librosa.feature.melspectrogram(y=x, sr=sr)
    
    if(plot):
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(librosa.power_to_db(spec, ref=np.max), y_axis='mel', fmax=sr, x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel spectrogram')
        plt.tight_layout()
    return spec

# Data Preprocessing

In [14]:
#['Hip-Hop','Rock','Pop','Folk','Experimental','International','Electronic','Instrumental']
input_genres = ['Rock','Pop','Folk','Experimental']

In [15]:
mydict={}
i = 0
for item in input_genres:
    if(i>0 and item in mydict):
        continue
    else:    
       mydict[item] = i
       i = i+1
mydict

{'Rock': 0, 'Pop': 1, 'Folk': 2, 'Experimental': 3}

In [18]:
tracks = utils.load('./fma_metadata/tracks.csv')
genres = utils.load('./fma_metadata/genres.csv')
features = utils.load('./fma_metadata/features.csv')
echonest = utils.load('./fma_metadata/echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

  """Entry point for launching an IPython kernel.


In [54]:
def create_batches(data, labels, batch_size, create_bit_vector=False):
    N = data.shape[0]
    print("This is : " + str(N))
    print ("Batch size {0}, the number of examples {1}.".format(batch_size,N))

    if N % batch_size != 0:
        print ("Warning in create_minibatches(): Batch size {0} does not " \
              "evenly divide the number of examples {1}.".format(batch_size,N))
    chunked_data = []
    chunked_labels = []
    idx = 0
    #print(data[:10])
    while idx + batch_size <= N:
        chunked_data.append(data[idx:idx+batch_size, :])
        if not create_bit_vector:
            chunked_labels.append(labels[idx:idx+batch_size])
        else:
            bit_vector = genre_to_bit_vector(labels[idx:idx+batch_size], len(input_genres))
            chunked_labels.append(bit_vector)

        idx += batch_size
    return chunked_data, chunked_labels


In [32]:
small = tracks['set', 'subset'] <= 'small'

train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'

In [33]:
genres = tracks.loc[small, ('track', 'genre_top')]

In [34]:
genres = tracks.loc[small, ('track', 'genre_top')]
# Take out track_id for specified genres to create smaller subset
mini_set = genres[genres.isin(input_genres)]

In [35]:
X_test = features.loc[small & test, 'mfcc']

In [37]:
X_test = X_test.values

numpy.ndarray

In [38]:
Xtr = features.loc[small & train, 'mfcc']
Xtr = Xtr.values

In [42]:
Ltr = tracks.loc[small & train, ('track', 'genre_top')]
#Ltr = Ltr[:3200] 

# Take out track_id for specified genres to create smaller subset
Ltr_mini = Ltr[Ltr.isin(input_genres)]
Xtr_mini = []
i = 0
for j in Ltr:
    if(j in input_genres):
        Xtr_mini.append(Xtr[i])
    i += 1 
Xtr_mini = np.asarray(Xtr_mini)

In [48]:
L_test = tracks.loc[small & test, ('track', 'genre_top')]

# Take out track_id for specified genres to create smaller subset
L_test_mini = L_test[L_test.isin(input_genres)]
X_test_mini = []
i = 0
for j in L_test:
    if((j in input_genres)):
        X_test_mini.append(X_test[i])
    i += 1 
X_test_mini = np.asarray(X_test_mini)

In [49]:
print(len(X_test_mini))
print(len(L_test_mini))
print(len(Xtr_mini))
#print(L_test_mini)

400
400
3200


In [50]:
mean = np.mean(Xtr_mini, axis=0)
std = np.std(Xtr_mini, axis=0)

Xtr_mini = (Xtr_mini - mean)/std
X_test_mini = (X_test_mini - mean)/std

In [57]:
batch_size=400;

train_data, train_labels, valid_data, valid_labels=prepare_for_backprop(batch_size, Xtr_mini, Ltr_mini, X_test_mini, L_test_mini)


Creating data...
This is : 3200
Batch size 400, the number of examples 3200.
This is : 400
Batch size 400, the number of examples 400.
Done!


# Default activation eta = 0.005

In [62]:
mlp = MultiLayerPerceptron(layer_config=[140,20, len(input_genres)], batch_size=batch_size)

mlp.evaluate(train_data, train_labels, valid_data, valid_labels,eta=0.001,
             eval_train=True)

print("Done:)\n")

Initializing input layer with size 140.
Initializing hidden layer with size 20.
Initializing output layer with size 4.
Done!
Training for 170 epochs...
[   0]  Training error: 0.75000 Test error: 0.75000
[   1]  Training error: 0.75000 Test error: 0.75000
[   2]  Training error: 0.75000 Test error: 0.75000
[   3]  Training error: 0.75000 Test error: 0.75000
[   4]  Training error: 0.75000 Test error: 0.75000
[   5]  Training error: 0.60750 Test error: 0.60000
[   6]  Training error: 0.63469 Test error: 0.61750
[   7]  Training error: 0.60500 Test error: 0.59750
[   8]  Training error: 0.58562 Test error: 0.59750
[   9]  Training error: 0.57031 Test error: 0.59750
[  10]  Training error: 0.55781 Test error: 0.58750
[  11]  Training error: 0.54156 Test error: 0.56750
[  12]  Training error: 0.53031 Test error: 0.54750
[  13]  Training error: 0.51687 Test error: 0.55000
[  14]  Training error: 0.50875 Test error: 0.53750
[  15]  Training error: 0.49750 Test error: 0.54000
[  16]  Training

[ 157]  Training error: 0.26281 Test error: 0.52750
[ 158]  Training error: 0.26219 Test error: 0.52750
[ 159]  Training error: 0.26125 Test error: 0.52750
[ 160]  Training error: 0.26094 Test error: 0.52500
[ 161]  Training error: 0.26062 Test error: 0.52500
[ 162]  Training error: 0.25938 Test error: 0.52500
[ 163]  Training error: 0.25812 Test error: 0.52500
[ 164]  Training error: 0.25625 Test error: 0.52500
[ 165]  Training error: 0.25469 Test error: 0.53000
[ 166]  Training error: 0.25438 Test error: 0.53250
[ 167]  Training error: 0.25438 Test error: 0.53250
[ 168]  Training error: 0.25344 Test error: 0.53750
[ 169]  Training error: 0.25281 Test error: 0.53750
Done:)

