In [1]:
import librosa
import numpy
import skimage.io
import os
import torchaudio
import torchaudio.transforms as T


def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

def spectrogram_image(y, sr, out, hop_length, n_mels):
    # use log-melspectrogram
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = numpy.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mels, 0, 255).astype(numpy.uint8)
    img = numpy.flip(img, axis=0) # put low frequencies at the bottom in image
    img = 255-img # invert. make black==more energy

    # save as PNG
    skimage.io.imsave(out, img)

if __name__ == '__main__':
    # settings
    hop_length = 4096 # number of samples per time-step in spectrogram
    n_mels = 105 # number of bins in spectrogram. Height of image
    time_steps = 599 # number of time-steps. Width of image
    
    # extract a fixed length window
    start_sample = 0 # starting at beginning
    length_samples = time_steps*hop_length
    
    
    # load audio. Using example from librosa
    #path = "C:/Users/georg/Desktop/kappa"
    path = "C:/Users/Michalis Zeakis/Desktop/university/ptyxiaki/datasets/our_wavs"
    #path = "C:/test"
    os.chdir(path)
    audio_files = os.listdir()
    for file in audio_files:
        name, ext = os.path.splitext(file)
        #y, sr = librosa.load(file, sr=44100)
        y, sr = torchaudio.load(file)
        out = 'C:/outs/torch_spects/{0}.png'.format(name)
        window = y[start_sample:start_sample+length_samples]
        # convert to PNG
        spect = T.MelSpectrogram(sample_rate = 44100, n_fft = hop_length*2, n_mels=n_mels, hop_length=hop_length)
        melspec = spect(window)
        #spectrogram_image(window, sr=sr, out=out, hop_length=hop_length, n_mels=n_mels)
        melspec = numpy.log(melspec + 1e-9)
        skimage.io.imsave(out, melspec[0])

    
    
    
    print('wrote file', out)



wrote file C:/outs/torch_spects/Your_Song.png
