In [19]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
 
import pathlib
from pathlib import Path
import matplotlib.pyplot as plot
import librosa
    
# import the necessary packages
from musicrec.vgg import VGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
from datetime import datetime
import pickle
import cv2
import os
import sys

from keras.models import Sequential

In [23]:

data_folder = Path("../../../audio/testfiles/GTZAN_test/genres/")
output_folder = Path("./output/cvnn.model")
spectogram_folder = Path("./img_data/")
# Duration of songsnippet in seconds
duration = 10
# Matplotlib colormap for spectogram
spectogram_cmap = 'binary' 
# Predefined list of genres
pred_genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() 
#imagePaths = sorted(list(spectogram_folder.list_images(args["dataset"])))
#imagePaths

imagesize_x = 256
imagesize_y = 256

In [24]:
#Get directories of all songs
songs = []
genres = []

spectograms = []

for g in data_folder.iterdir():
    genres.append(g.name)
    for i in g.iterdir():
        songs.append(i)

In [26]:
# Calculate all spectograms
cmap = plot.get_cmap(spectogram_cmap)
plot.figure(figsize=(2.5,5))
spectograms = []


# Iterate through all songs and generate their spactograms. Save them all as images.
for genre in genres:
    pathlib.Path(f'img_data/{genre}').mkdir(parents=True, exist_ok=True)     
for song in songs:
    y, sr = librosa.load(song, mono=True, duration=duration)
    plot.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plot.axis('off');
    spec_path = pathlib.Path(f'img_data/{song.parts[-2]}/{".".join(str(song.parts[-1]).split(".")[:2])}.png')
    spectograms.append(spec_path)
    plot.savefig(spec_path, bbox_inches='tight', pad_inches = 0)
    plot.clf()

In [27]:
# Seed for RNG
random.seed(datetime.now())

In [28]:
# Shuffle the spectograms
random.shuffle(spectograms)
spectograms

[PosixPath('img_data/reggae/reggae.00003.png'),
 PosixPath('img_data/reggae/reggae.00004.png'),
 PosixPath('img_data/hiphop/hiphop.00005.png'),
 PosixPath('img_data/blues/blues.00003.png'),
 PosixPath('img_data/hiphop/hiphop.00002.png'),
 PosixPath('img_data/rock/rock.00003.png'),
 PosixPath('img_data/hiphop/hiphop.00001.png'),
 PosixPath('img_data/reggae/reggae.00000.png'),
 PosixPath('img_data/rock/rock.00004.png'),
 PosixPath('img_data/country/country.00000.png'),
 PosixPath('img_data/blues/blues.00001.png'),
 PosixPath('img_data/metal/metal.00001.png'),
 PosixPath('img_data/hiphop/hiphop.00003.png'),
 PosixPath('img_data/pop/pop.00002.png'),
 PosixPath('img_data/jazz/jazz.00000.png'),
 PosixPath('img_data/jazz/jazz.00002.png'),
 PosixPath('img_data/blues/blues.00002.png'),
 PosixPath('img_data/metal/metal.00003.png'),
 PosixPath('img_data/pop/pop.00005.png'),
 PosixPath('img_data/country/country.00005.png'),
 PosixPath('img_data/disco/disco.00003.png'),
 PosixPath('img_data/classic

In [29]:
# Import images, convert them to grayscale (one uint8 per pixel) and load them into an array
images = []
for spec_path in spectograms:
    image = cv2.imread(str(spec_path))
    col_pixels = np.array(np.where(image != 255))
    first_col_pixel = col_pixels[:,0]
    last_col_pixel = col_pixels[:,-1]
    image = image[first_col_pixel[0]:last_col_pixel[0], first_col_pixel[1]:last_col_pixel[1]]
    #image = cv2.resize(image, (imagesize_x, imagesize_y))
    imagesize_x = image.shape[1]
    imagesize_y = image.shape[0]
    image_name = str(spec_path) + "-crop.png"
    cv2.imwrite(image_name, image)
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    images.append(image)
images = np.array(images, dtype="float") / 255.0 
data = images

In [30]:
# Create array with correct labels for each spectogram
labels = []
for spec in spectograms:
    labels.append(spec.parts[-2])  
labels

['reggae',
 'reggae',
 'hiphop',
 'blues',
 'hiphop',
 'rock',
 'hiphop',
 'reggae',
 'rock',
 'country',
 'blues',
 'metal',
 'hiphop',
 'pop',
 'jazz',
 'jazz',
 'blues',
 'metal',
 'pop',
 'country',
 'disco',
 'classical',
 'reggae',
 'jazz',
 'country',
 'rock',
 'jazz',
 'hiphop',
 'classical',
 'rock',
 'classical',
 'jazz',
 'blues',
 'disco',
 'metal',
 'rock',
 'rock',
 'disco',
 'metal',
 'pop',
 'pop',
 'blues',
 'disco',
 'country',
 'classical',
 'country',
 'disco',
 'pop',
 'blues',
 'metal',
 'hiphop',
 'reggae',
 'classical',
 'metal',
 'country',
 'classical',
 'reggae',
 'jazz',
 'pop',
 'disco']

In [31]:
np.shape(data)

(60, 384, 193, 3)

In [38]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
 
# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)
np.size(trainY)
trainY

array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0, 0,

In [33]:
lb.classes_

array(['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
       'metal', 'pop', 'reggae', 'rock'], dtype='<U9')

In [34]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, 
                         height_shift_range=0.1, shear_range=0.2, 
                         zoom_range=0.2,horizontal_flip=True, 
                         fill_mode="nearest")
 
# initialize our VGG-like Convolutional Neural Network
model = VGGNet.build(width=imagesize_x, height=imagesize_y, depth=3, 
                          num_genres=len(lb.classes_))

In [35]:
# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 550
BS = 32
 
# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
 
# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS), 
                        validation_data=(testX, testY), 
                        steps_per_epoch=len(trainX) // BS,
                        epochs=EPOCHS)

[INFO] training network...
Epoch 1/550
Epoch 2/550
Epoch 3/550
Epoch 4/550
Epoch 5/550
Epoch 6/550
Epoch 7/550
Epoch 8/550
Epoch 9/550
Epoch 10/550
Epoch 11/550
Epoch 12/550
Epoch 13/550
Epoch 14/550
Epoch 15/550
Epoch 16/550
Epoch 17/550
Epoch 18/550
Epoch 19/550
Epoch 20/550
Epoch 21/550
Epoch 22/550
Epoch 23/550
Epoch 24/550
Epoch 25/550
Epoch 26/550
Epoch 27/550
Epoch 28/550
Epoch 29/550
Epoch 30/550
Epoch 31/550
Epoch 32/550
Epoch 33/550
Epoch 34/550
Epoch 35/550
Epoch 36/550
Epoch 37/550
Epoch 38/550
Epoch 39/550
Epoch 40/550
Epoch 41/550
Epoch 42/550
Epoch 43/550
Epoch 44/550
Epoch 45/550
Epoch 46/550
Epoch 47/550
Epoch 48/550
Epoch 49/550
Epoch 50/550
Epoch 51/550
Epoch 52/550
Epoch 53/550
Epoch 54/550
Epoch 55/550
Epoch 56/550
Epoch 57/550
Epoch 58/550
Epoch 59/550
Epoch 60/550
Epoch 61/550
Epoch 62/550
Epoch 63/550


Epoch 64/550
Epoch 65/550
Epoch 66/550
Epoch 67/550
Epoch 68/550
Epoch 69/550
Epoch 70/550
Epoch 71/550
Epoch 72/550
Epoch 73/550
Epoch 74/550
Epoch 75/550
Epoch 76/550
Epoch 77/550
Epoch 78/550
Epoch 79/550
Epoch 80/550
Epoch 81/550
Epoch 82/550
Epoch 83/550
Epoch 84/550
Epoch 85/550
Epoch 86/550
Epoch 87/550
Epoch 88/550
Epoch 89/550
Epoch 90/550
Epoch 91/550
Epoch 92/550
Epoch 93/550
Epoch 94/550
Epoch 95/550
Epoch 96/550
Epoch 97/550
Epoch 98/550
Epoch 99/550
Epoch 100/550
Epoch 101/550
Epoch 102/550
Epoch 103/550
Epoch 104/550
Epoch 105/550
Epoch 106/550
Epoch 107/550
Epoch 108/550
Epoch 109/550
Epoch 110/550
Epoch 111/550
Epoch 112/550
Epoch 113/550
Epoch 114/550
Epoch 115/550
Epoch 116/550
Epoch 117/550
Epoch 118/550
Epoch 119/550
Epoch 120/550
Epoch 121/550
Epoch 122/550
Epoch 123/550
Epoch 124/550
Epoch 125/550
Epoch 126/550
Epoch 127/550


Epoch 128/550
Epoch 129/550
Epoch 130/550
Epoch 131/550
Epoch 132/550
Epoch 133/550
Epoch 134/550
Epoch 135/550
Epoch 136/550
Epoch 137/550
Epoch 138/550
Epoch 139/550
Epoch 140/550
Epoch 141/550
Epoch 142/550
Epoch 143/550
Epoch 144/550
Epoch 145/550
Epoch 146/550
Epoch 147/550
Epoch 148/550
Epoch 149/550
Epoch 150/550
Epoch 151/550
Epoch 152/550
Epoch 153/550
Epoch 154/550
Epoch 155/550
Epoch 156/550
Epoch 157/550
Epoch 158/550
Epoch 159/550
Epoch 160/550
Epoch 161/550
Epoch 162/550
Epoch 163/550
Epoch 164/550
Epoch 165/550
Epoch 166/550
Epoch 167/550
Epoch 168/550
Epoch 169/550
Epoch 170/550
Epoch 171/550
Epoch 172/550
Epoch 173/550
Epoch 174/550
Epoch 175/550
Epoch 176/550
Epoch 177/550
Epoch 178/550
Epoch 179/550
Epoch 180/550
Epoch 181/550
Epoch 182/550
Epoch 183/550
Epoch 184/550
Epoch 185/550
Epoch 186/550
Epoch 187/550
Epoch 188/550
Epoch 189/550
Epoch 190/550


Epoch 191/550
Epoch 192/550
Epoch 193/550


KeyboardInterrupt: 

In [None]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
                            predictions.argmax(axis=1), 
                            target_names=lb.classes_))
 
# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy (SmallVGGNet)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
#plt.savefig(args["plot"])
 
# save the model and label binarizer to disk
#print("[INFO] serializing network and label binarizer...")
#model.save(args["model"])
#f = open(args["label_bin"], "wb")
#f.write(pickle.dumps(lb))
#f.close()