# Workflow:

1. Importing necessary packages
2. Feature exraction and creation of dataset
3. Train - test split
4. Feature scaling
5. Model building
6. Evaluation metrics
7. Random value prediction
8. Saving the model

## Importing necessary packages

In [64]:
import os
import numpy as np
from tensorflow import keras
import librosa
from matplotlib import pyplot
import flask
import pandas as pd
import soundfile as sf
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from scipy.stats import randint as sp_randint
from keras.layers import LSTM

## Feature extraction using librosa

1. Mel frequency Cepstral coefficients
2. Mel spectrogram
3. Chroma vector
4. Tonal Centroid Features

### Mel frequency cepstral coefficients  

In [8]:
#Defining a function that passses audio file as a parameter to obtain MFCC
def get_mfcc(file_path):
    
    #Converting the mp3 audio data to wav as supported by librosa
    data, samplerate = sf.read(file_path)
    sf.write("temp.wav", data, samplerate)
    
    #Passing the audio file to return MFCC
    y,sr=librosa.load("temp.wav",offset=0,duration=30)
    mfcc=np.array(librosa.feature.mfcc(y=y,sr=sr))
    return mfcc

In [27]:
#Finding shape of mfcc for one track
mfcc_shape=get_mfcc("/Users/casarulez/Projects/Genre_classification/genres/blues/blues.00085.au")
mfcc_shape.shape

(20, 1292)

### Mel Spectrogram 

In [15]:
def get_melspectrogram(file_path):
    
    #Converting the mp3 audio data to wav as supported by librosa
    data, samplerate = sf.read(file_path)
    sf.write("temp.wav", data, samplerate)
    
    #Passing the audio file to return melspectrogram
    y,sr=librosa.load("temp.wav",offset=0,duration=30)
    melspectrogram=np.array(librosa.feature.melspectrogram(y=y,sr=sr))
    return melspectrogram

In [26]:
#Finding shape of melspectrogram for one track
mel_shape=get_melspectrogram("/Users/casarulez/Projects/Genre_classification/genres/blues/blues.00085.au")
mel_shape.shape

(128, 1292)

### Chroma vector 

In [4]:
def get_chroma_vector(file_path):
    #Converting the audio data to wav as supported by librosa
    data, samplerate = sf.read(file_path)
    sf.write("temp.wav", data, samplerate)
    
    #Passing the audio file to return chroma
    y,sr=librosa.load("temp.wav",offset=0,duration=30)
    chroma=np.array(librosa.feature.chroma_stft(y=y,sr=sr))
    return chroma

In [25]:
#Finding shape of chroma for one track
chroma_shape=get_chroma_vector("/Users/casarulez/Projects/Genre_classification/genres/blues/blues.00085.au")
chroma_shape.shape

(12, 1292)

### Tonal centroid features 

In [20]:
def get_tonnetz(file_path):
    #Converting the audio data to wav as supported by librosa
    data, samplerate = sf.read(file_path)
    sf.write("temp.wav", data, samplerate)
    
    #Passing the audio file to return tonnetz
    y,sr=librosa.load("temp.wav",offset=0,duration=30)
    tonnetz=np.array(librosa.feature.tonnetz(y=y,sr=sr))
    return tonnetz

In [24]:
#Finding shape of tonnetz for one track
tonnetz_shape=get_tonnetz("/Users/casarulez/Projects/Genre_classification/genres/blues/blues.00085.au")
tonnetz_shape.shape

(6, 1292)

In [22]:
#Defining a function to extract all features
def get_features(file_path):
    
    #Converting the audio data to wav as supported by librosa
    data, samplerate = sf.read(file_path)
    sf.write("temp.wav", data, samplerate)
    
    #MFCC
    mfcc=get_mfcc("temp.wav")
    mfcc_feature=np.concatenate((mfcc.mean(axis=1),mfcc.min(axis=1),mfcc.max(axis=1)))
    
    #Melspectrogram
    melspectrogram=get_melspectrogram("temp.wav")
    melspectrogram_feature=np.concatenate((melspectrogram.mean(axis=1),melspectrogram.min(axis=1),melspectrogram.max(axis=1)))
    
    #Chroma
    chroma=get_chroma_vector("temp.wav")
    chroma_feature=np.concatenate((chroma.mean(axis=1),chroma.min(axis=1),chroma.max(axis=1)))
    
    #Tonnetz
    tntz=get_tonnetz("temp.wav")
    tntz_feature=np.concatenate((tntz.mean(axis=1),tntz.min(axis=1),tntz.max(axis=1)))
    
    #All features
    feature=np.concatenate((chroma_feature,melspectrogram_feature,mfcc_feature,tntz_feature))
    return feature

## Calculating features for the full dataset 

In [23]:
#Defining directory, genres, feature and labels
directory="/Users/casarulez/Projects/Genre_classification/genres"
genres=["blues","classical","country","disco","hiphop","jazz","metal","pop","reggae","rock"]
features=[]
labels=[]

#Iterating over data directory to calculate features
for genre in genres:
    print("Calculating features for genre:",genre)
    for file in os.listdir(directory+"/"+genre):
            file_path=directory+"/"+genre+"/"+file
            
            features.append(get_features(file_path))
            label=genres.index(genre)
            labels.append(label)
print("Feature calculation complete")

Calculating features for genre: blues
Calculating features for genre: classical
Calculating features for genre: country
Calculating features for genre: disco
Calculating features for genre: hiphop
Calculating features for genre: jazz
Calculating features for genre: metal
Calculating features for genre: pop
Calculating features for genre: reggae
Calculating features for genre: rock
Feature calculation complete


In [1]:
#Shape of features and labels list
print(len(features))
print(len(labels))

NameError: name 'features' is not defined

## Splitting the dataset into training, validation and testing 

In [31]:
#Shufling features and labels
permutations=np.random.permutation(1000)
features=np.array(features)[permutations]
labels=np.array(labels)[permutations]

#Training data
features_train=features[0:600]
labels_train=labels[0:600]

#Validation data
features_val=features[600:800]
labels_val=labels[600:800]

#Testing data
features_test=features[800:1000]
labels_test=labels[800:1000]

#Checking shape of training and testing variables
features_train.shape,features_test.shape,labels_train.shape,labels_test.shape

((600, 498), (200, 498), (600,), (200,))

## Feature scaling 

In [32]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [33]:
#Transforming features_train to a standard scale
features_train=sc.fit_transform(features_train)
features_train

array([[-0.56176733, -0.42827787, -0.14581086, ..., -0.39142909,
        -0.53818868, -1.23612573],
       [-0.82222578, -1.27475677,  0.45966409, ...,  0.35285786,
         0.92452212,  0.0616157 ],
       [-2.28285676, -2.12173381, -1.68483088, ...,  0.97459864,
         1.61656711,  1.41709568],
       ...,
       [-0.06061188,  0.22254303,  0.31983719, ..., -0.13270662,
         0.13017711,  0.58324677],
       [ 1.04779221,  0.57836249,  0.61825878, ..., -1.41115766,
        -0.85907654, -1.09343102],
       [-0.76351985, -0.06017888, -0.83750704, ..., -1.10068938,
        -1.01504562, -1.25932717]])

In [37]:
features_train.tolist()
print(len(features_train))

600


In [34]:
#Transforming features_test to a standard scale
features_test=sc.fit_transform(features_test)
features_test

array([[-0.61366474,  0.8921163 ,  0.25326711, ...,  0.44704962,
         1.15370791,  0.32442307],
       [ 0.56227779,  1.2009041 ,  0.13066414, ..., -0.65931983,
         0.16065284, -0.64682533],
       [-0.3574392 , -0.74213102, -2.06831547, ...,  0.88956178,
         1.03997269,  1.30688756],
       ...,
       [ 0.85566978,  0.94908678,  0.96759574, ..., -0.13600017,
        -1.12102158, -1.08732538],
       [ 0.33371084,  1.09285625,  1.85260584, ..., -0.92875516,
        -0.77030964, -0.85039718],
       [ 0.29673758, -0.03323537,  0.1809059 , ...,  0.57177147,
         0.87346106,  0.77610519]])

In [38]:
features_test.tolist()
print(len(features_test))

200


## Model building 

In [82]:
#Adding input layer, 2 dense layers and output later
inputs=keras.Input(shape=(498), name="feature")
x=keras.layers.Dense(300,activation="tanh",name="dense_1")(inputs)
x=keras.layers.Dense(200,activation="relu",name="dense_2")(x)
outputs=keras.layers.Dense(10,activation="softmax",name="predictions")(x)

#Defining model
model=keras.Model(inputs=inputs,outputs=outputs)

#Compiling model
model.compile(optimizer=keras.optimizers.RMSprop(),loss=keras.losses.SparseCategoricalCrossentropy(),metrics=[keras.metrics.SparseCategoricalAccuracy()])


#Fitting the model
model.fit(features_train.tolist(),labels_train.tolist(),verbose=1,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x29b3d76d0>

## Model evaluation 

In [83]:
#Accuracy of training data
score=model.evaluate(x=features_train.tolist(),y=labels_train.tolist())
print('Accuracy:'+str(score[1]*100)+'%')

Accuracy:100.0%


In [84]:
#Accuracy on testing data
score=model.evaluate(x=features_test.tolist(),y=labels_test.tolist())
print('Accuracy:'+str(score[1]*100)+'%')

Accuracy:69.9999988079071%


## Random value prediction 

In [None]:
#Declaring a function to predict genre of input audio
def prediction(file_path):
    feature=get_features(file_path)
    feature=feature.reshape(1,498)
    feature=sc.fit_transform(feature)
    y=model.predict(feature)
    ind=numpy.argmax(y)
    print(genres[ind])

In [None]:
#prediction("/Users/casarulez/Downloads/in.mp3")

## Saving the model 

In [None]:
model.save("genre.h5")