In [1]:
import librosa
import numpy as np
import skimage.io
import os
import numpy

def scale_minmax(X, min=0.0, max=1.0):
    #scale the values between 0 - 1 
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled


def spectrogram_image(y, sr, out, hop_length, n_mels):
    # use log-melspectrogram
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = numpy.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mels, 0, 255).astype(np.uint8)
    img = numpy.flip(img, axis=0) # put low frequencies at the bottom in image
    img = 255-img # invert. make black==more energy

    # save as PNG
    skimage.io.imsave(out, img)

    
if __name__ == '__main__':
    # settings
    hop_length = 4096 # number of samples per time-step in spectrogram
    n_mels = 105 # number of bins in spectrogram. Height of image
    time_steps = 599 # number of time-steps. Width of image
    
    # extract a fixed length window
    start_sample = 0 # starting at beginning
    length_samples = time_steps*hop_length
    
    
    # load audio. Using example from librosa
    path = "C:/Users/Michalis Zeakis/Desktop/university/ptyxiaki/datasets/test_dataset_wav"

    
    os.chdir(path)
    audio_files = os.listdir()
    for file in audio_files:
        name, ext = os.path.splitext(file) # splits the name form the extension(.wav)
        y, sr = librosa.load(file, sr=44100) # load the wav 
        out = 'C:/outs/testSpects/{0}.png'.format(name) # set the output folder 
        window = y[start_sample:start_sample+length_samples] # set the dimensions of the spectrogram
        
        # convert to PNG
        spectrogram_image(window, sr=sr, out=out, hop_length=hop_length, n_mels=n_mels)
    
    
    
    print('wrote file', out)

wrote file C:/outs/testSpects/yt5s.com - Kamado Tanjirou no Uta - Orchestral Version (from _Demon Slayer_) (128 kbps).png
