In [None]:
import os
import time

import librosa
import librosa.display
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pywt
import tensorflow as tf
from scipy.io import wavfile

# Train data

In [None]:
def get_instrument_data(dir):
    instrument_mel_data = []
    for file in os.listdir(dir):
        instrument_mel_data.extend(get_mel(dir + '/' + file))
    return np.asarray(instrument_mel_data)

def get_instrument_scalogram_data(dir):
    instrument_mel_data = []
    for file in os.listdir(dir):
        instrument_mel_data.extend(get_scalogram(dir + '/' + file))
    return np.asarray(instrument_mel_data)

def read_train_data(dir, scalogram=False):
    data = []
    labels = []
    for instrument in os.scandir(dir):
        if instrument.is_dir():
            if scalogram:
                instrument_data = get_instrument_scalogram_data(instrument.path)
            else:
                instrument_data = get_instrument_data(instrument.path)
            data.append(instrument_data)
            labels.extend([instrument.name] * len(instrument_data))
    return np.concatenate(data, axis=0), labels

# Test data

In [None]:
def get_label(file):
    with open(file) as f:
        label = f.readline().rstrip()
    return label

def read_test_data(dir, scalogram=False):
    data = []
    labels = []
    label = ''
    parts = next(os.walk(dir))[1]
    for part in parts:
        part_path = dir + '/' + part
        for file in os.listdir(part_path):
            file_path = part_path + '/' + file
            if file.endswith('.txt'):
                label = get_label(file_path)
            elif file.endswith('.wav'):
                if scalogram:
                    mel_specs = get_test_scalogram(file_path)
                else:
                    mel_specs = get_test_mel(file_path)
                multiplier = len(mel_specs)
                data.append(mel_specs)
                for i in range(multiplier):
                    labels.append(label)
    data = [element for sublist in data for element in sublist]
    return np.asarray(data), labels

# From .wav to mel spectogram

In [None]:
def get_mel(filename, plot=False, test=False):
    # load audio
    samples, sample_rate = librosa.load(filename, sr=None)
    if plot:
        # x-axis has been converted to time using our sample rate.
        # matplotlib plt.plot(y), would output the same figure, but with sample
        # number on the x-axis instead of seconds
        plt.figure(figsize=(14, 5))
        librosa.display.waveshow(samples, sr=sample_rate)
        plt.show()

    # print('Example shape ', samples.shape, 'Sample rate ', sample_rate, 'Data type', type(samples))
    # print(samples[22400:22420])

    shape = samples.shape[0]
    if test:
        seconds = round(shape / sample_rate) - 1
    else:
        seconds = round(shape / sample_rate)

    audio_samples = [samples[i * sample_rate: ((i + 1) * sample_rate) - 1] for i in range(seconds)]
    # audio_samples = [samples[0:sample_rate], samples[sample_rate:2 * sample_rate], samples[2 * sample_rate:]]

    mel_sgrams = []
    for audio_sample in audio_samples:

        # get spectogram
        sgram = librosa.stft(audio_sample)
        if plot:
            librosa.display.specshow(sgram)
            plt.show()

        # use the mel-scale instead of raw frequency
        sgram_mag, _ = librosa.magphase(sgram)
        mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate)
        if plot:
            librosa.display.specshow(mel_scale_sgram)
            plt.show()

        # use the decibel scale to get the final Mel Spectrogram
        mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)
        mel_sgrams.append(mel_sgram)
        if plot:
            librosa.display.specshow(mel_sgram, sr=sample_rate, x_axis='time', y_axis='mel')
            plt.colorbar(format='%+2.0f dB')
            plt.show()

    return mel_sgrams

def get_test_mel(file):
    mel_specs = []

    mel_specs.extend(get_mel(file, plot=False, test=True))

    return mel_specs



# From .wav to scalogram

In [None]:
def get_scalogram(filename, plot=False):
    # def cwt(file)

    # test
    sample_rate, data = wavfile.read(filename)

    # Normalize
    data = np.arange(len(data))
    data = data / max(data)
    # print(data)

    wav = pywt.ContinuousWavelet('morl')
    width = wav.upper_bound - wav.lower_bound + 1

    # get scalogram
    coef, freqs = pywt.cwt(data, np.arange(1, 129), 'morl')

    if plot:
        fig, ax = plt.subplots()
        ax.imshow(coef, cmap='coolwarm', aspect='auto')
        # plt.matshow(coef)
    return coef

def get_test_scalogram(filename, plot=False):
    # def cwt(file)

    # test
    sample_rate, data = wavfile.read(filename)

    # Normalize
    data = np.arange(len(data))
    data = data / max(data)
    # print(data)

    shape = data.shape[0]
    seconds = round(shape / sample_rate)
    samples = int(seconds / 3)

    audio_data = [data[i * sample_rate: ((i + 3) * sample_rate) - 1] for i in range(samples)]

    coefs = []

    for audio in audio_data:

        wav = pywt.ContinuousWavelet('morl')
        width = wav.upper_bound - wav.lower_bound + 1

        # get scalogram
        coef, freqs = pywt.cwt(audio, np.arange(1, 129), 'morl')

        coefs.append(coef)

        if plot:
            fig, ax = plt.subplots()
            ax.imshow(coef, cmap='coolwarm', aspect='auto')
            # plt.matshow(coef)
    return coefs

# Utility functions

In [None]:
def convert_labels(labels):
    instruments = ['cel', 'cla', 'flu', 'gac', 'gel', 'org', 'pia', 'sax', 'tru', 'vio', 'voi']
    for index, instrument in enumerate(instruments):
        labels = [index if label == instrument else label for label in labels]
    return labels


def time_passed(t):
    now = time.time()
    print('Time passed: {} seconds.'.format(now - t))
    return now

# Process data

In [None]:
t = time.time()

train_dir = '../dataset/IRMAS-TrainingData'

X_train, y_train = read_train_data(train_dir)
y_train = convert_labels(y_train)
y_train = tf.keras.utils.to_categorical(y_train, 11)
np.save('../dataset/X_train_data.npy', X_train)
np.save('../dataset/y_train_data.npy', y_train)

t = time_passed(t)

Time passed: 1.2632086277008057 seconds.


In [None]:
test_dir = './dataset/IRMAS-TestingData'

X_test, y_test = read_test_data(test_dir)
y_test = convert_labels(y_test)
y_test = tf.keras.utils.to_categorical(y_test, 11)
np.save('./dataset/Χ_test_data.npy', X_test)
np.save('./dataset/y_test_data.npy', y_test)

t = time_passed(t)

In [None]:
train_dir = './dataset/IRMAS-TrainingData'

X_train, y_train = read_train_data(train_dir, scalogram=True)
y_train = convert_labels(y_train)
y_train = tf.keras.utils.to_categorical(y_train, 11)
np.save('./dataset/X_train_scalogram_data.npy', X_train)
np.save('./dataset/y_train_scalogram_data.npy', y_train)

t = time_passed(t)

In [None]:
test_dir = './dataset/IRMAS-TestingData'

X_test, y_test = read_test_data(test_dir, scalogram=True)
y_test = convert_labels(y_test)
y_test = tf.keras.utils.to_categorical(y_test, 11)
np.save('./dataset/X_test_scalogram_data.npy', X_test)
np.save('./dataset/y_test_scalogram_data.npy', y_test)

t = time_passed(t)