In [2]:
import sys
import os
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt

In [2]:
def extractSTFTs(path, genre=None, print_message=True):
    """
    Extract STFTs from sound files in folder
    Genre shift is used to convert from indexing per genre (4 times 1-100) to indexing over all samples (1-400)
    There are some songs of shorter duration, their STFT array will be filled with np.nan's 
    """

    sample_rate = 44100
    
    duration = 60.02938776  # seconds per song. Shorter songs are accounted for later
    duration = 15

    n_fft = 2048
    
    win_length = int(0.025*sample_rate)
    hop_length = int(0.010*sample_rate)

    number_of_samples_per_genre = 100

    STFTs = np.full((number_of_samples_per_genre, int(1+n_fft/2), 1500), np.nan)
    
    for file in os.scandir(path):
        if file.path.endswith(".mp3"):
            if print_message:
                print("analyzing ", file.name, file.path)

            currentpath = path + "/" + file.name
            int_track = int(file.name[:-4])

            sound, sample_rate = librosa.load(currentpath, sr=None, mono=True)
            
            stft = np.absolute(librosa.stft(
                sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window='hann'))

            if np.shape(stft)[1] <= 1500:
                stft = np.append(stft,
                                 np.full((int(1+n_fft/2), 1500-np.shape(stft)[1]), np.nan),
                                axis=-1)
            
            elif np.shape(stft)[1] > 1500:
                stft = stft[:, :1500]

            print(int_track, np.shape(stft))

            STFTs[int_track-1] = stft

    return STFTs  # , INDEX

In [3]:
genres = ["classical", "electronic", "pop", "rock"]

STFT = []

for genre in genres:
    STFT.append(np.load(f"stfts_{genre}.npy"))
    
STFT_reshaped = np.reshape(STFT, (np.shape(STFT)[0]*np.shape(STFT)[1], np.shape(STFT)[2], np.shape(STFT)[3]))
    
np.savez_compressed('STFT', STFT_reshaped)