In [1]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import keras
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [3]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'C:/Users/nigel/Desktop/NSG ML/genres/{g}'):
        songname = f'C:/Users/nigel/Desktop/NSG ML/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [4]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [5]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'C:/Users/nigel/Desktop/NSG ML/genres/{g}'):
        songname = f'C:/Users/nigel/Desktop/NSG ML/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())


In [3]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00000.au,0.349943,0.130225,1784.420446,2002.650192,3806.485316,0.083066,-113.596742,121.557302,-19.158825,...,8.810668,-3.667367,5.75169,-5.162761,0.750947,-1.691937,-0.409954,-2.300208,1.219928,blues
1,blues.00001.au,0.340983,0.095918,1529.835316,2038.617579,3548.820207,0.056044,-207.556796,124.006717,8.930562,...,5.376802,-2.239119,4.216963,-6.012273,0.936109,-0.716537,0.293875,-0.287431,0.531573,blues
2,blues.00002.au,0.363603,0.175573,1552.481958,1747.165985,3040.514948,0.076301,-90.754394,140.459907,-29.109965,...,5.789265,-8.905224,-1.08372,-9.218359,2.455805,-7.726901,-1.815724,-3.433434,-2.226821,blues
3,blues.00003.au,0.404779,0.141191,1070.119953,1596.333948,2185.028454,0.033309,-199.431144,150.099218,5.647594,...,6.087676,-2.47642,-1.07389,-2.874777,0.780976,-3.316932,0.637981,-0.61969,-3.408233,blues
4,blues.00004.au,0.30859,0.091563,1835.494603,1748.362448,3580.945013,0.1015,-160.266031,126.1988,-35.605448,...,-2.806385,-6.934122,-7.558619,-9.173552,-4.512166,-5.453538,-0.924162,-4.409333,-11.703781,blues


In [4]:
data.shape
np.array(data)

array([['blues.00000.au', 0.3499431970389887, 0.13022463023662567, ...,
        -2.3002080931099553, 1.219928131251096, 'blues'],
       ['blues.00001.au', 0.340983161628006, 0.09591842442750932, ...,
        -0.2874306648121582, 0.5315729228628573, 'blues'],
       ['blues.00002.au', 0.363602838496103, 0.17557303607463834, ...,
        -3.4334342717656465, -2.2268214410307183, 'blues'],
       ...,
       ['rock.00097.au', 0.4321034468964737, 0.08161668479442596, ...,
        -12.594177514097485, -2.10700255491672, 'rock'],
       ['rock.00098.au', 0.3623490158126501, 0.08388779312372208, ...,
        -5.043121274989656, -3.5855956471307313, 'rock'],
       ['rock.00099.au', 0.35819512534874665, 0.05446073040366173, ...,
        -2.0220346710460277, 1.158525253018968, 'rock']], dtype=object)

In [5]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [6]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [7]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))
X

array([[-0.35174862, -0.01072298, -0.58330334, ..., -0.23719158,
         0.00761145,  0.60349813],
       [-0.46146578, -0.53326615, -0.93906628, ..., -0.05518978,
         0.5438236 ,  0.42403528],
       [-0.18448399,  0.68001209, -0.90741936, ..., -0.60070707,
        -0.29428464, -0.29511278],
       ...,
       [ 0.65431762, -0.75110651, -0.17418012, ...,  0.76028053,
        -2.73474414, -0.26387449],
       [-0.19983726, -0.71651358, -1.12235633, ...,  0.2717664 ,
        -0.72311185, -0.64936228],
       [-0.25070236, -1.16473892, -0.82782084, ..., -0.12506872,
         0.08171799,  0.58748963]])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [9]:
len(y_train)

800

In [10]:
len(y_test)

200

In [11]:
X_train[0]

array([ 1.39449724,  1.47060174,  0.98545433,  0.25140643,  0.60221307,
        1.6496682 ,  1.49161158, -0.78238686, -0.72901394,  1.24332047,
       -1.68895741,  0.96325771, -0.74780876,  0.69948674, -0.39118402,
        1.37015953, -1.02585361,  2.15765235, -0.79324632,  1.91273361,
       -0.69639818,  1.43828371, -0.31295742,  0.74952739, -1.22694914,
        1.99215478])

In [12]:
x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

In [15]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=200,
          batch_size=32,
          validation_data=(x_val, y_val))
results = model.evaluate(X_test, y_test)
print('Test loss:', results[0])
print('Test accuracy:', results[1])

Instructions for updating:
Use tf.cast instead.
Train on 600 samples, validate on 200 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200


Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200


Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200


Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Test loss: 2.3894962787628176
Test accuracy: 0.63


In [16]:
import tkinter
from tkinter import messagebox
from tkinter import filedialog
from PIL import ImageTk,Image
from tkinter import *


main_win = tkinter.Tk()
main_win.geometry("1000x500")
main_win.sourceFile = ''
w = Label(main_win, text="AUDIO GENRE CLASSIFICATION", font=("Helvetica", 25))
w.pack()



def chooseFile():
    main_win.sourceFile = filedialog.askopenfilename(parent=main_win, initialdir= "/", title='Please select a directory')

b_chooseFile = tkinter.Button(main_win, text = "Choose File", width = 20, height = 3, command = chooseFile)
b_chooseFile.place(x = 400,y = 150)
b_chooseFile.width = 100



def task():  
    cmap = plt.get_cmap('inferno')
    songname1 = f'{main_win.sourceFile}'
    y1, sr1 = librosa.load(songname1, mono=True, duration=30)
    plt.specgram(y1, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plt.axis('off');
    plt.clf()

    header = 'chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
    for i in range(1, 21):
        header += f' mfcc{i}'
    header = header.split()
    file = open('data1.csv', 'w', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(header)
    chroma_stft = librosa.feature.chroma_stft(y=y1, sr=sr1)
    rmse = librosa.feature.rmse(y=y1)
    spec_cent = librosa.feature.spectral_centroid(y=y1, sr=sr1)
    spec_bw = librosa.feature.spectral_bandwidth(y=y1, sr=sr1)
    rolloff = librosa.feature.spectral_rolloff(y=y1, sr=sr1)
    zcr = librosa.feature.zero_crossing_rate(y1)
    mfcc = librosa.feature.mfcc(y=y1, sr=sr1)
    to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    file = open('data1.csv', 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(to_append.split())
    data1 = pd.read_csv('data1.csv')
    scaler = StandardScaler()
    X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))
    X1= scaler.transform(data1)
    predictions=model.predict(X1)
    if(np.argmax(predictions[0])==0):
        out = Label(main_win, text="---BLUES-----", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==1):
        out = Label(main_win, text="--CLASSICAL--", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==2):
        out = Label(main_win, text="--COUNTRY----", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==3):
        out = Label(main_win, text="----DISCO----", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==4):
        out = Label(main_win, text="---HIP-HOP---", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==5):
        out = Label(main_win, text="----JAZZ----", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==6):
        out = Label(main_win, text="---METAL---", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==7):
        out = Label(main_win, text="----POP----", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==8):
        out = Label(main_win, text="--REGGAE--", font=("Helvetica", 25))
        out.place(x = 400,y = 350)
    if(np.argmax(predictions[0])==9):
        out = Label(main_win, text="---ROCK---", font=("Helvetica", 25))
        out.place(x = 400,y = 350)

predict = tkinter.Button(main_win, text = "Predict", width = 20, height = 3, command = task)
predict.place(x = 400,y = 250)
predict.width = 100

main_win.mainloop()

  

<Figure size 432x288 with 0 Axes>

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:             
     json_file.write(model_json) 

# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [None]:
from keras.models import model_from_json
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

In [14]:
y_train

array([6, 4, 0, 2, 6, 6, 3, 9, 6, 7, 9, 7, 9, 2, 1, 3, 1, 2, 7, 8, 3, 5,
       7, 7, 6, 8, 5, 5, 9, 0, 9, 4, 7, 9, 0, 6, 2, 5, 5, 1, 1, 3, 7, 6,
       3, 6, 1, 3, 7, 0, 8, 0, 7, 4, 7, 0, 1, 5, 3, 7, 1, 9, 9, 8, 5, 8,
       4, 0, 4, 4, 0, 6, 2, 5, 1, 9, 3, 5, 1, 2, 8, 3, 9, 9, 8, 4, 2, 8,
       4, 8, 7, 3, 4, 9, 4, 2, 8, 0, 4, 4, 4, 2, 1, 0, 3, 2, 9, 4, 6, 0,
       9, 8, 4, 6, 4, 7, 3, 6, 1, 6, 3, 5, 6, 0, 0, 6, 7, 3, 7, 5, 0, 6,
       7, 4, 7, 9, 4, 9, 1, 3, 2, 7, 1, 9, 5, 6, 7, 5, 2, 7, 9, 7, 4, 0,
       1, 9, 3, 7, 4, 1, 1, 1, 2, 5, 3, 1, 7, 9, 2, 7, 1, 3, 8, 8, 8, 4,
       4, 3, 1, 3, 3, 2, 1, 4, 9, 1, 3, 3, 7, 0, 6, 1, 3, 4, 8, 8, 6, 2,
       8, 0, 8, 6, 9, 0, 8, 7, 5, 6, 8, 4, 4, 6, 5, 0, 7, 6, 7, 9, 9, 1,
       2, 2, 9, 4, 2, 9, 5, 0, 3, 5, 2, 1, 1, 3, 1, 8, 8, 3, 4, 6, 5, 6,
       4, 0, 7, 1, 9, 4, 5, 4, 6, 6, 4, 2, 0, 6, 6, 1, 4, 7, 6, 8, 2, 4,
       9, 2, 2, 6, 6, 5, 4, 6, 1, 2, 1, 5, 4, 8, 2, 5, 5, 6, 7, 7, 9, 1,
       6, 0, 8, 1, 2, 0, 2, 0, 7, 7, 7, 0, 4, 5, 2,