# To-mel
Script to convert an input (GTZAN dataset) to mel spectrogram for training data.
Mel spectograms logarithmically render frequency above a given threshold.

In [45]:
# Libraries
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

# List of all audio files in GTZAN dataset
gtzan = {}
for (dirpath, dirnames, filenames) in os.walk("../dev/"):
    for filename in filenames:
        if filename.endswith('.au'):
            path = os.path.join(dirpath, filename)
            gtzan[filename] = path

# Loop through items, create spectrograms
for filename, path in gtzan.items():
    # Load audio file as python audio object
    y, sr = librosa.load(path)

    # Define spectrogram dimensions
    fig, ax = plt.subplots()
    height = 128 # Height of image

    # Generate a mel spectrogram array
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=height, fmax=8000)

    # Generate spectrogram
    S_dB = librosa.power_to_db(S, ref=np.max)
    img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)

    # Generate filename
    name = filename

    # Generate savepath
    genre, track, filetype = name.split(".")
    savepath = "../dev/spec/" + genre + "/" + genre + "." + track + ".png"

    if not os.path.exists(os.path.join("../dev/spec/" + genre)):
        os.mkdir("../dev/spec/" + genre)

    # Write spectrogram to file
    plt.savefig(savepath)
    plt.clf()