# Artificial Neural Network

In [33]:
import random
import numpy as np

In [34]:
class Network(object):

    def __init__(self, sizes):
        """The list ``sizes`` contains the number of neurons in the
        respective layers of the network.  For example, if the list
        was [2, 3, 1] then it would be a three-layer network, with the
        first layer containing 2 neurons, the second layer 3 neurons,
        and the third layer 1 neuron.  The biases and weights for the
        network are initialized randomly, using a Gaussian
        distribution with mean 0, and variance 1.  Note that the first
        layer is assumed to be an input layer, and by convention we
        won't set any biases for those neurons, since biases are only
        ever used in computing the outputs from later layers."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]
        #parameters for Adam optimizer
        self.vdb = [np.zeros(b.shape) for b in self.biases]
        self.vdw = [np.zeros(w.shape) for w in self.weights]
        self.sdb = [np.zeros(b.shape) for b in self.biases]
        self.sdw = [np.zeros(w.shape) for w in self.weights]
        
        
        
    def feedforward(self, a,using_soft_max=False):
        """Return the output of the network if ``a`` is input."""
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            a = sigmoid(np.dot(w, a)+ b)
            
        b,w = (self.biases[-1],self.weights[-1])
        if using_soft_max:
            a = soft_max(np.dot(w, a)+ b)
        else:
            a = sigmoid(np.dot(w, a)+ b)
        return a
    
    def SGD(self, training_data, epochs, mini_batch_size, eta,test_data=None,using_soft_max = False,using_adam_optimizer = False):
        """Train the neural network using mini-batch stochastic
        gradient descent.  The ``training_data`` is a list of tuples
        ``(x, y)`` representing the training inputs and the desired
        outputs.  The other non-optional parameters are
        self-explanatory.  If ``test_data`` is provided then the
        network will be evaluated against the test data after each
        epoch, and partial progress printed out.  This is useful for
        tracking progress, but slows things down substantially."""
        if test_data != None:
            n_test = len(test_data)

        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta, j, using_soft_max=using_soft_max,using_adam_optimizer=using_adam_optimizer)
            if test_data != None:
                print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data,using_soft_max), n_test))
            else:
                print("Epoch {0} complete".format(j))
    
    
    def update_mini_batch(self, mini_batch, eta,epoch,using_soft_max=False,using_adam_optimizer = False):
        beta1 = 0.9 
        beta2 = 0.999
        epsilon = np.array([pow(10, -8)])
        """Update the network's weights and biases by applying
        gradient descent using backpropagation to a single mini batch.
        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
        is the learning rate."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y, soft_max)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        #Adam optimizer
        if using_adam_optimizer:
            self.vdw = np.array([beta1*vv+(1-beta1)*nw for vv, nw in zip(self.vdw,nabla_w)])
            self.vdb = np.array([beta1*bb+(1-beta1)*nb for bb, nb in zip(self.vdb,nabla_b)])
            self.sdw = np.array([beta2*ss+(1-beta2)*pow(nw, 2) for ss, nw in zip(self.sdw,nabla_w)])
            self.sdb = np.array([beta2*ss+(1-beta2)*pow(nw, 2) for ss, nw in zip(self.sdb,nabla_b)])
            vdw_corrected = self.vdw / (1-pow(beta1, epoch+1))
            vdb_corrected = self.vdb / (1-pow(beta1, epoch+1))
            sdw_corrected = self.sdw / (1-pow(beta2,epoch+1))
            sdb_corrected = self.sdb / (1-pow(beta2,epoch+1))
            self.weights = [w - (eta * (v_corrected / (np.sqrt(s_corrected)+ epsilon)))
                        for w, v_corrected, s_corrected in zip(self.weights,vdw_corrected,sdw_corrected)]
            self.biases = [b - (eta * (v_corrected / (np.sqrt(s_corrected)+ epsilon)))
                        for b, v_corrected, s_corrected in zip(self.biases,vdb_corrected,sdb_corrected)]
        else:     
            self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
            self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]
    
    def backprop(self, x, y, using_soft_max=False,using_cross_entropy = True):
        """Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        w,b = (self.weights[-1],self.biases[-1])
        z = np.dot(w, activation)+b
        zs.append(z)
        if using_soft_max:
            activation = soft_max(z)
        else:
            activation = sigmoid(z)
        activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * soft_max_prime(zs[-1])
        if using_cross_entropy:
            delta = self.cost_derivative(activations[-1], y)
        else:
            delta = self.cost_derivative(activations[-1], y) * soft_max_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    
    def evaluate(self, test_data, soft_max):
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        test_results = [(np.argmax(self.feedforward(x,using_soft_max=soft_max)), np.argmax(y)) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)
    
    

### Activation Functions

In [35]:
def soft_max(z):
    expA = np.exp(z - np.max(z))
    res = expA / expA.sum()
    return res

In [36]:
def soft_max_prime(z):
    return soft_max(z)*(1-soft_max(z))

In [37]:
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

In [38]:
def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

## Preparing data

In [39]:
%cd D:\Intelligent Systems\

D:\Intelligent Systems


In [40]:
import pandas as pd
import csv
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [41]:
supported_genres=['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']

In [42]:
#normalising all features
data = pd.read_csv('predictions_cnn.csv')
genre_list=[]

for row in  data.loc[:,'filename']:
    genre_list.append(row.split('.')[0])

minMaxScaler = MinMaxScaler()
X=minMaxScaler.fit_transform(np.array(data.iloc[:,3:-10], dtype = float))

normalised_values_table = pd.concat([data.loc[:,['filename','offset','duration']],
                           pd.DataFrame(X,columns=data.columns[3:-10]),
                           data.loc[:,supported_genres],
                           pd.DataFrame(np.array(genre_list),columns=['genre'])],
                           axis=1)
normalised_values_table.to_csv('Normalised_Features.csv',index=False)

In [43]:
#calculating desired output according to genre column in table
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
auxArray = np.array([[1],[0],[0],[0],[0],[0],[0],[0],[0],[0]])
desiredOutput = [np.roll(auxArray,x) for x in y]
desiredOutput = np.array(desiredOutput)

In [44]:
#in this part we include which features we want to give to ANN and combine them with predictions
USING_PREDICTIONS = True
data = pd.read_csv('Normalised_Features.csv')
features_selection = ['chroma_stft','spectral_centroid','spectral_bandwidth','rolloff','zero_crossing_rate', 'mfcc']
new_feature_table = pd.DataFrame()

for feature in features_selection:
    if feature != 'mfcc':
        new_feature_table = pd.concat([new_feature_table, data.loc[:,[feature]]],axis=1)
    else:
        new_feature_table = pd.concat([new_feature_table, data.filter(regex=("mfcc.*"))],axis=1)

if USING_PREDICTIONS:       
    features_and_predictions = pd.concat([new_feature_table,ndata.loc[:,supported_genres]],naxis=1)
else:
    features_and_predictions = new_feature_table   

ANN_NUM_OF_INPUT = len(features_and_predictions.columns)
ANN_NUM_OF_OUTPUT = len(supported_genres)


In [45]:
data_set=[]

for row_index,row in features_and_predictions.iterrows():
    r = np.array(row)
    data_set.append(np.reshape(r,(len(row),1)))
data_set = [(x,y) for x,y in zip(data_set,desiredOutput)]
random.shuffle(data_set)  

## Learning

In [46]:
ann = Network([ANN_NUM_OF_INPUT,20,15,ANN_NUM_OF_OUTPUT])

In [47]:
learning_dataset = data_set[:3000]
evaluation_dataset = data_set[3000:]

In [48]:
ann.SGD(learning_dataset,2000,32,0.03,test_data=evaluation_dataset, using_soft_max=True,using_adam_optimizer=True)

Epoch 0: 222 / 1000
Epoch 1: 274 / 1000
Epoch 2: 321 / 1000
Epoch 3: 358 / 1000
Epoch 4: 369 / 1000
Epoch 5: 360 / 1000
Epoch 6: 388 / 1000
Epoch 7: 383 / 1000
Epoch 8: 387 / 1000
Epoch 9: 379 / 1000
Epoch 10: 389 / 1000
Epoch 11: 399 / 1000
Epoch 12: 406 / 1000
Epoch 13: 419 / 1000
Epoch 14: 413 / 1000
Epoch 15: 430 / 1000
Epoch 16: 434 / 1000
Epoch 17: 433 / 1000
Epoch 18: 445 / 1000
Epoch 19: 433 / 1000
Epoch 20: 450 / 1000
Epoch 21: 446 / 1000
Epoch 22: 467 / 1000
Epoch 23: 455 / 1000
Epoch 24: 444 / 1000
Epoch 25: 483 / 1000
Epoch 26: 440 / 1000
Epoch 27: 463 / 1000
Epoch 28: 474 / 1000
Epoch 29: 472 / 1000
Epoch 30: 488 / 1000
Epoch 31: 489 / 1000
Epoch 32: 458 / 1000
Epoch 33: 501 / 1000
Epoch 34: 486 / 1000
Epoch 35: 473 / 1000
Epoch 36: 507 / 1000
Epoch 37: 473 / 1000
Epoch 38: 497 / 1000
Epoch 39: 511 / 1000
Epoch 40: 509 / 1000
Epoch 41: 512 / 1000
Epoch 42: 492 / 1000
Epoch 43: 518 / 1000
Epoch 44: 499 / 1000
Epoch 45: 515 / 1000
Epoch 46: 523 / 1000
Epoch 47: 520 / 1000
Ep

Epoch 378: 627 / 1000
Epoch 379: 620 / 1000
Epoch 380: 618 / 1000
Epoch 381: 601 / 1000
Epoch 382: 600 / 1000
Epoch 383: 596 / 1000
Epoch 384: 610 / 1000
Epoch 385: 589 / 1000
Epoch 386: 577 / 1000
Epoch 387: 606 / 1000
Epoch 388: 605 / 1000
Epoch 389: 598 / 1000
Epoch 390: 602 / 1000
Epoch 391: 603 / 1000
Epoch 392: 608 / 1000
Epoch 393: 620 / 1000
Epoch 394: 607 / 1000
Epoch 395: 608 / 1000
Epoch 396: 617 / 1000
Epoch 397: 613 / 1000
Epoch 398: 622 / 1000
Epoch 399: 614 / 1000
Epoch 400: 616 / 1000
Epoch 401: 621 / 1000
Epoch 402: 595 / 1000
Epoch 403: 616 / 1000
Epoch 404: 594 / 1000
Epoch 405: 599 / 1000
Epoch 406: 614 / 1000
Epoch 407: 605 / 1000
Epoch 408: 603 / 1000
Epoch 409: 618 / 1000
Epoch 410: 610 / 1000
Epoch 411: 604 / 1000
Epoch 412: 607 / 1000
Epoch 413: 615 / 1000
Epoch 414: 615 / 1000
Epoch 415: 619 / 1000
Epoch 416: 609 / 1000
Epoch 417: 634 / 1000
Epoch 418: 614 / 1000
Epoch 419: 624 / 1000
Epoch 420: 623 / 1000
Epoch 421: 618 / 1000
Epoch 422: 623 / 1000
Epoch 423:

Epoch 751: 601 / 1000
Epoch 752: 619 / 1000
Epoch 753: 617 / 1000
Epoch 754: 599 / 1000
Epoch 755: 612 / 1000
Epoch 756: 602 / 1000
Epoch 757: 605 / 1000
Epoch 758: 581 / 1000
Epoch 759: 591 / 1000
Epoch 760: 612 / 1000
Epoch 761: 624 / 1000
Epoch 762: 603 / 1000
Epoch 763: 621 / 1000
Epoch 764: 600 / 1000
Epoch 765: 612 / 1000
Epoch 766: 616 / 1000
Epoch 767: 601 / 1000
Epoch 768: 601 / 1000
Epoch 769: 601 / 1000
Epoch 770: 624 / 1000
Epoch 771: 613 / 1000
Epoch 772: 584 / 1000
Epoch 773: 603 / 1000
Epoch 774: 591 / 1000
Epoch 775: 598 / 1000
Epoch 776: 600 / 1000
Epoch 777: 629 / 1000
Epoch 778: 587 / 1000
Epoch 779: 586 / 1000
Epoch 780: 611 / 1000
Epoch 781: 588 / 1000
Epoch 782: 601 / 1000
Epoch 783: 592 / 1000
Epoch 784: 602 / 1000
Epoch 785: 604 / 1000
Epoch 786: 609 / 1000
Epoch 787: 593 / 1000
Epoch 788: 600 / 1000
Epoch 789: 601 / 1000
Epoch 790: 611 / 1000
Epoch 791: 598 / 1000
Epoch 792: 599 / 1000
Epoch 793: 594 / 1000
Epoch 794: 597 / 1000
Epoch 795: 605 / 1000
Epoch 796:

Epoch 1119: 602 / 1000
Epoch 1120: 596 / 1000
Epoch 1121: 587 / 1000
Epoch 1122: 608 / 1000
Epoch 1123: 602 / 1000
Epoch 1124: 593 / 1000
Epoch 1125: 595 / 1000
Epoch 1126: 588 / 1000
Epoch 1127: 605 / 1000
Epoch 1128: 609 / 1000
Epoch 1129: 604 / 1000
Epoch 1130: 576 / 1000
Epoch 1131: 605 / 1000
Epoch 1132: 619 / 1000
Epoch 1133: 616 / 1000
Epoch 1134: 582 / 1000
Epoch 1135: 609 / 1000
Epoch 1136: 597 / 1000
Epoch 1137: 583 / 1000
Epoch 1138: 610 / 1000
Epoch 1139: 597 / 1000
Epoch 1140: 603 / 1000
Epoch 1141: 602 / 1000
Epoch 1142: 569 / 1000
Epoch 1143: 602 / 1000
Epoch 1144: 611 / 1000
Epoch 1145: 600 / 1000
Epoch 1146: 591 / 1000
Epoch 1147: 595 / 1000
Epoch 1148: 593 / 1000
Epoch 1149: 597 / 1000
Epoch 1150: 587 / 1000
Epoch 1151: 607 / 1000
Epoch 1152: 610 / 1000
Epoch 1153: 582 / 1000
Epoch 1154: 600 / 1000
Epoch 1155: 583 / 1000
Epoch 1156: 603 / 1000
Epoch 1157: 591 / 1000
Epoch 1158: 606 / 1000
Epoch 1159: 616 / 1000
Epoch 1160: 603 / 1000
Epoch 1161: 591 / 1000
Epoch 1162:

KeyboardInterrupt: 