In [1]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
 
import pathlib
from pathlib import Path
import matplotlib.pyplot as plot
import librosa
    
# import the necessary packages
from musicrec.vgg import VGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
from datetime import datetime
import pickle
import cv2
import os
import sys

from keras.models import Sequential

Using TensorFlow backend.


In [2]:
np.set_printoptions(threshold=sys.maxsize)

data_folder = Path("../../../audio/testfiles/GTZAN/genres/")
output_folder = Path("./output/cvnn.model")
spectogram_folder = Path("./img_data/")
# Duration of songsnippet in seconds
duration = 10
# Matplotlib colormap for spectogram
spectogram_cmap = 'binary' 
# Predefined list of genres
pred_genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() 
#imagePaths = sorted(list(spectogram_folder.list_images(args["dataset"])))
#imagePaths

imagesize_x = 32
imagesize_y = 32

In [3]:
#Get directories of all songs
songs = []
genres = []

spectograms = []

for g in data_folder.iterdir():
    genres.append(g.name)
    for i in g.iterdir():
        songs.append(i)

In [4]:
# Calculate all spectograms
cmap = plot.get_cmap(spectogram_cmap)
plot.figure(figsize=(10,10))
spectograms = []

# Iterate through all songs and generate their spactograms. Save them all as images.
for genre in genres:
    pathlib.Path(f'img_data/{genre}').mkdir(parents=True, exist_ok=True)     
for song in songs:
    y, sr = librosa.load(song, mono=True, duration=duration)
    plot.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plot.axis('off');
    spec_path = pathlib.Path(f'img_data/{song.parts[-2]}/{".".join(str(song.parts[-1]).split(".")[:2])}.png')
    spectograms.append(spec_path)
    plot.savefig(spec_path)
    plot.clf()

  Z = 10. * np.log10(spec)


In [5]:
# Seed for RNG
random.seed(datetime.now())

In [6]:
# Shuffle the spectograms
random.shuffle(spectograms)
spectograms

[PosixPath('img_data/country/country.00018.png'),
 PosixPath('img_data/jazz/jazz.00076.png'),
 PosixPath('img_data/classical/classical.00019.png'),
 PosixPath('img_data/metal/metal.00059.png'),
 PosixPath('img_data/jazz/jazz.00090.png'),
 PosixPath('img_data/blues/blues.00023.png'),
 PosixPath('img_data/pop/pop.00035.png'),
 PosixPath('img_data/metal/metal.00064.png'),
 PosixPath('img_data/country/country.00031.png'),
 PosixPath('img_data/classical/classical.00000.png'),
 PosixPath('img_data/rock/rock.00013.png'),
 PosixPath('img_data/metal/metal.00002.png'),
 PosixPath('img_data/metal/metal.00000.png'),
 PosixPath('img_data/disco/disco.00089.png'),
 PosixPath('img_data/metal/metal.00060.png'),
 PosixPath('img_data/rock/rock.00014.png'),
 PosixPath('img_data/reggae/reggae.00024.png'),
 PosixPath('img_data/rock/rock.00015.png'),
 PosixPath('img_data/blues/blues.00040.png'),
 PosixPath('img_data/pop/pop.00020.png'),
 PosixPath('img_data/country/country.00094.png'),
 PosixPath('img_data/d

In [7]:
# Import images, convert them to grayscale (one uint8 per pixel) and load them into an array
images = []
for spec_path in spectograms:
    image = cv2.imread(str(spec_path))
    col_pixels = np.array(np.where(image != 255))
    first_col_pixel = col_pixels[:,0]
    last_col_pixel = col_pixels[:,-1]
    image = image[first_col_pixel[0]:last_col_pixel[0], first_col_pixel[1]:last_col_pixel[1]]
    image = cv2.resize(image, (imagesize_x, imagesize_y))
    image_name = str(spec_path) + "-crop.png"
    cv2.imwrite(image_name, image)
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    images.append(image)
images = np.array(images, dtype="float") / 255.0 
data = images

In [8]:
# Create array with correct labels for each spectogram
labels = []
for spec in spectograms:
    labels.append(spec.parts[-2])  
labels

['country',
 'jazz',
 'classical',
 'metal',
 'jazz',
 'blues',
 'pop',
 'metal',
 'country',
 'classical',
 'rock',
 'metal',
 'metal',
 'disco',
 'metal',
 'rock',
 'reggae',
 'rock',
 'blues',
 'pop',
 'country',
 'disco',
 'reggae',
 'country',
 'disco',
 'classical',
 'rock',
 'blues',
 'pop',
 'rock',
 'rock',
 'country',
 'pop',
 'jazz',
 'metal',
 'classical',
 'rock',
 'disco',
 'hiphop',
 'hiphop',
 'pop',
 'rock',
 'blues',
 'jazz',
 'country',
 'metal',
 'blues',
 'hiphop',
 'rock',
 'classical',
 'jazz',
 'metal',
 'pop',
 'jazz',
 'classical',
 'blues',
 'reggae',
 'country',
 'classical',
 'classical',
 'disco',
 'rock',
 'pop',
 'classical',
 'rock',
 'reggae',
 'pop',
 'hiphop',
 'rock',
 'hiphop',
 'hiphop',
 'disco',
 'country',
 'pop',
 'hiphop',
 'disco',
 'disco',
 'country',
 'jazz',
 'country',
 'country',
 'country',
 'rock',
 'jazz',
 'reggae',
 'classical',
 'country',
 'pop',
 'country',
 'reggae',
 'jazz',
 'blues',
 'blues',
 'rock',
 'disco',
 'classical'

In [9]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
 
# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)
np.size(trainY)

7500

In [10]:
lb.classes_

array(['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
       'metal', 'pop', 'reggae', 'rock'], dtype='<U9')

In [11]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, 
                         height_shift_range=0.1, shear_range=0.2, 
                         zoom_range=0.2,horizontal_flip=True, 
                         fill_mode="nearest")
 
# initialize our VGG-like Convolutional Neural Network
model = VGGNet.build(width=imagesize_x, height=imagesize_y, depth=3, 
                          classes=len(lb.classes_))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [12]:
# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 75
BS = 32
 
# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
 
# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS), 
                        validation_data=(testX, testY), 
                        steps_per_epoch=len(trainX) // BS,
                        epochs=EPOCHS)

[INFO] training network...
Instructions for updating:
Use tf.cast instead.
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75


Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [13]:
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
                            predictions.argmax(axis=1), 
                            target_names=lb.classes_))
 
# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy (SmallVGGNet)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig(args["plot"])
 
# save the model and label binarizer to disk
print("[INFO] serializing network and label binarizer...")
model.save(args["model"])
f = open(args["label_bin"], "wb")
f.write(pickle.dumps(lb))
f.close()

[INFO] evaluating network...


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

       blues       0.35      0.43      0.39        28
   classical       0.93      0.54      0.68        24
     country       1.00      0.03      0.06        31
       disco       0.24      0.15      0.19        26
      hiphop       0.19      0.52      0.27        25
        jazz       0.38      0.25      0.30        24
       metal       0.63      0.76      0.69        25
         pop       0.52      0.96      0.68        26
      reggae       0.15      0.12      0.14        24
        rock       0.00      0.00      0.00        17

   micro avg       0.38      0.38      0.38       250
   macro avg       0.44      0.38      0.34       250
weighted avg       0.46      0.38      0.34       250



NameError: name 'args' is not defined