In [73]:
import librosa
import pandas
import numpy as np
import matplotlib.pyplot as plot
import pathlib
from pathlib import Path
import os
import random
from datetime import datetime
import cv2

In [74]:
def getGenreFromPath(path):
    split_name = str(path.name).split(".")
    return path.parts[-2]
    

In [75]:
data_folder = Path("../../audio/testfiles/GTZAN/genres/")
spectogram_folder = Path("./img_data/")
#duration of songsnippet in seconds
duration = 10
#matplotlib colormap for spectogram
spectogram_cmap = 'binary' 
#predefined list of genres
pred_genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() 
#imagePaths = sorted(list(spectogram_folder.list_images(args["dataset"])))
#imagePaths

In [76]:
#Get directories of all songs
songs = []
genres = []

spectograms = []

for g in data_folder.iterdir():
    genres.append(g.name)
    for i in g.iterdir():
        songs.append(i)

In [77]:
#Calculate all spectograms
cmap = plot.get_cmap(spectogram_cmap)
plot.figure(figsize=(10,10))
spectograms = []

#Iterate through all songs and generate their spactograms. Save them all as images.
for genre in genres:
    pathlib.Path(f'img_data/{genre}').mkdir(parents=True, exist_ok=True)     
for song in songs:
    y, sr = librosa.load(song, mono=True, duration=duration)
    plot.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
    plot.axis('off');
    spec_path = pathlib.Path(f'img_data/{getGenreFromPath(song)}/{".".join(str(song.parts[-1]).split(".")[:2])}.png')
    spectograms.append(spec_path)
    plot.savefig(spec_path)
    plot.clf()

<Figure size 720x720 with 0 Axes>

In [78]:
#Seed for RNG
random.seed(datetime.now())

In [79]:
#Shuffle the spectograms
random.shuffle(spectograms)
spectograms

[PosixPath('img_data/reggae/reggae.00057.png'),
 PosixPath('img_data/jazz/jazz.00087.png'),
 PosixPath('img_data/reggae/reggae.00096.png'),
 PosixPath('img_data/rock/rock.00044.png'),
 PosixPath('img_data/rock/rock.00065.png'),
 PosixPath('img_data/blues/blues.00029.png'),
 PosixPath('img_data/disco/disco.00032.png'),
 PosixPath('img_data/hiphop/hiphop.00093.png'),
 PosixPath('img_data/rock/rock.00040.png'),
 PosixPath('img_data/reggae/reggae.00045.png'),
 PosixPath('img_data/jazz/jazz.00022.png'),
 PosixPath('img_data/disco/disco.00098.png'),
 PosixPath('img_data/classical/classical.00097.png'),
 PosixPath('img_data/reggae/reggae.00047.png'),
 PosixPath('img_data/country/country.00079.png'),
 PosixPath('img_data/hiphop/hiphop.00013.png'),
 PosixPath('img_data/metal/metal.00004.png'),
 PosixPath('img_data/jazz/jazz.00056.png'),
 PosixPath('img_data/pop/pop.00046.png'),
 PosixPath('img_data/metal/metal.00084.png'),
 PosixPath('img_data/rock/rock.00050.png'),
 PosixPath('img_data/classic

In [80]:
#Import images, convert them to grayscale (one uint8 per pixel) and load them into an array
images = []
for spec_path in spectograms:
    image = cv2.imread(str(spec_path))
    image = image[86:630, 90:648]
    cv2.imwrite(str(spec_path), image)
    image = cv2.resize(image, (32, 32))
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    images.append(gray_image)
images = np.array(images, dtype="float") / 255.0
images

array([[[0.46666667, 0.46666667, 0.58431373, ..., 0.56862745,
         0.59607843, 0.54509804],
        [0.43921569, 0.6       , 0.61568627, ..., 0.58039216,
         0.64705882, 0.5372549 ],
        [0.45882353, 0.43137255, 0.64313725, ..., 0.57254902,
         0.61568627, 0.62352941],
        ...,
        [0.34901961, 0.21176471, 0.38039216, ..., 0.31372549,
         0.31764706, 0.25490196],
        [0.28235294, 0.29411765, 0.32941176, ..., 0.32941176,
         0.46666667, 0.38431373],
        [0.19215686, 0.18039216, 0.25098039, ..., 0.19607843,
         0.34901961, 0.2745098 ]],

       [[0.59607843, 0.60392157, 0.58823529, ..., 0.60784314,
         0.54509804, 0.64705882],
        [0.56470588, 0.6745098 , 0.61176471, ..., 0.54509804,
         0.54117647, 0.56470588],
        [0.61176471, 0.55294118, 0.57254902, ..., 0.64705882,
         0.51372549, 0.54117647],
        ...,
        [0.71764706, 0.6745098 , 0.59607843, ..., 0.58431373,
         0.59607843, 0.54901961],
        [0.5

In [81]:
#Create array with correct labels for each spectogram
labels = []
for spec in spectograms:
    labels.append(spec.parts[-2])
labels

['reggae',
 'jazz',
 'reggae',
 'rock',
 'rock',
 'blues',
 'disco',
 'hiphop',
 'rock',
 'reggae',
 'jazz',
 'disco',
 'classical',
 'reggae',
 'country',
 'hiphop',
 'metal',
 'jazz',
 'pop',
 'metal',
 'rock',
 'classical',
 'metal',
 'metal',
 'pop',
 'jazz',
 'jazz',
 'classical',
 'jazz',
 'metal',
 'blues',
 'classical',
 'classical',
 'pop',
 'blues',
 'country',
 'disco',
 'jazz',
 'metal',
 'reggae',
 'classical',
 'classical',
 'classical',
 'reggae',
 'classical',
 'country',
 'pop',
 'reggae',
 'reggae',
 'hiphop',
 'jazz',
 'country',
 'pop',
 'country',
 'pop',
 'hiphop',
 'blues',
 'country',
 'pop',
 'reggae',
 'country',
 'metal',
 'disco',
 'reggae',
 'hiphop',
 'metal',
 'blues',
 'jazz',
 'classical',
 'blues',
 'blues',
 'rock',
 'blues',
 'hiphop',
 'jazz',
 'hiphop',
 'disco',
 'rock',
 'metal',
 'classical',
 'hiphop',
 'classical',
 'country',
 'rock',
 'hiphop',
 'hiphop',
 'metal',
 'hiphop',
 'disco',
 'hiphop',
 'rock',
 'classical',
 'blues',
 'disco',
 '