# EEE4113F Machine Learning Project - Spectrogram Generation

## Necessary Imports

In [2]:
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp

import scipy.signal as spsig
import scipy.signal.windows as spwin

import random
import os
import librosa
import librosa.display

import csv

## Generate Spectrograms
Basically convert a .wav or .mp3

In [2]:
# Create spectrograms
def generateSpectrograms(source_dir, out_dir, window, hop):

# array of sound types available
    # sound_types = ["Snare", "Trumpet", "Violin"]
    sound_types = ["Piano"]

    # iterate over each of the sound types
    for sound_type in sound_types:

        # establish output directories
        # TODO: Add checks that directorties exist
        sound_dir = os.fsencode(source_dir + "/" + sound_type)
        spec_dir = os.fsencode(out_dir + "/" + sound_type)

        if not os.path.exists(spec_dir):
            os.makedirs(spec_dir)
        
        # remove all existing spectrograms
        for spec_file in os.listdir(spec_dir):
            filename = os.fsdecode(spec_file)
            if filename.endswith(".png"):
                os.remove(os.path.join(spec_dir, spec_file))


        # iterate over each file (in each directory)
        for sound_file in os.listdir(sound_dir):
            filename = os.fsdecode(sound_file)

            # filter out audio files
            if filename.endswith(".wav") or filename.endswith(".mp3" or filename.endswith(".flac")):
                
                # load sound files
                # librosa is convenient because it resamples and downscales to mono automatically
                y_orig, fs = librosa.load(os.path.join(sound_dir, sound_file), mono=True, sr=48000)

                # set output time and corresponding number of samples
                output_time = 1 # seconds
                output_len = output_time * fs

                # number of samples of loaded file
                input_len = np.shape(y_orig)[0]

                # skip if loaded file is too short analyse
                if input_len < output_len:
                    print(filename, "is too short, skipping conversion")
                    continue
                
                # find a section with a high rms value
                jump = output_len//2 # spacing between sections
                rms_best = 0
                rms_best_start = 0

                # loop over sections of the sample to find the big with the best rms value
                for start in range(0, input_len - output_len, jump):
                    end = start + output_len
                    rms = np.sqrt(np.mean(np.square(y_orig[start:end])))
                    if rms > rms_best:
                        rms_best = rms
                        rms_best_start = start

                # window with best RMS value
                y = y_orig[rms_best_start:rms_best_start + output_len]

                # Compute the Short-Time Fourier Transform (STFT)
                # D = librosa.stft(y)

                # STFT calc for spectrogramss

                T_x, N = 1 / fs, output_len  # 20 Hz sampling rate for 50 s signal

                t_x = np.arange(N) * T_x  # time indexes for signal

                # SFT = spsig.ShortTimeFFT(win, hop=hop, fs=fs, mfft=16000, scale_to='psd')
                SFT = spsig.ShortTimeFFT(window, hop=hop, fs=fs, mfft=4096, scale_to='psd')

                Sx2 = SFT.spectrogram(y)  # calculate absolute square of STFT

                Sx_dB = 10 * np.log10(np.fmax(Sx2, 5e-9))  # limit range to ~-83dB

                fig1, ax1 = plt.subplots(figsize=(6., 4.))  # enlarge plot a bit
                im1 = ax1.imshow(Sx_dB, origin='lower', aspect='auto', extent=SFT.extent(N), cmap='gray')


                output_file = os.path.join(spec_dir, os.fsencode(os.path.splitext(filename)[0] + ".png"))
                plt.axis("off")

                plt.savefig(output_file, bbox_inches='tight', pad_inches=0)        
                plt.close()

In [4]:
rectangular = np.ones(1024)

generateSpectrograms("../Training-data/Sounds", "../Training-data/Rectangular", rectangular, 512)
generateSpectrograms("../Test-data/Sounds", "../Test-data/Rectangular", rectangular, 512)

hann = spwin.hann(1024)

generateSpectrograms("../Training-data/Sounds", "../Training-data/Hann", hann, 512)
generateSpectrograms("../Test-data/Sounds", "../Test-data/Hann", hann, 512)

blackman = spwin.blackman(1024)

generateSpectrograms("../Training-data/Sounds", "../Training-data/Blackman", blackman, 512)
generateSpectrograms("../Test-data/Sounds", "../Test-data/Blackman", blackman, 512)

In [3]:
# Create arrays of ground truth data

def generateGroundTruthCSV(specdir, groundTruthCSVName):

    sound_types = ["Snare", "Trumpet", "Violin", "Piano"]

    data = []

    for sound_type in sound_types:

        # establish output directories
        # TODO: Add checks that directorties exist
        spec_dir = os.fsencode(specdir + "/" + sound_type)
        
        # remove all existing spectrograms
        for spec_file in os.listdir(spec_dir):
            
            filename = os.fsdecode(spec_file)
            
            if filename.endswith(".png"):

                data.append([os.fsdecode(os.path.join(spec_dir, spec_file)), sound_type])

    key = lambda row: row[0] 

    data.sort(key=lambda item: item[0])

    with open(groundTruthCSVName, 'w+') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerows(data)

In [5]:
generateGroundTruthCSV("../Training-data/Rectangular", "../Training-data/rectangular.csv")
generateGroundTruthCSV("../Training-data/Blackman", "../Training-data/blackman.csv")

generateGroundTruthCSV("../Test-data/Rectangular", "../Test-data/rectangular.csv")
generateGroundTruthCSV("../Test-data/Blackman", "../Test-data/blackman.csv")

generateGroundTruthCSV("../Training-data/Hann", "../Training-data/hann.csv")
generateGroundTruthCSV("../Test-data/Hann", "../Test-data/hann.csv")



In [4]:
generateGroundTruthCSV("../Training-data/MFCC", "../Training-data/mfcc.csv")
generateGroundTruthCSV("../Test-data/MFCC", "../Test-data/mfcc.csv")