# Imports

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from skimage import io

# Handle Data

In [None]:
def read_audio(f_names: list, path: str):
    data   = [[]] * len(f_names)  # (data, samp_rate)
    srs     = [0] * len(f_names)
    labels = [0] * len(f_names)

    for i in range(len(f_names)):
        x, sr = librosa.load(path + f_names[i], sr=None, mono=True)
        data[i] = x
        srs[i] = sr

        if i % 100 == 0:
            print('i=', i, '\t num points:', x.shape, 'samp_rate:', sr)
    print("Finished reading", len(data), "audio files from", path,)
    return data, srs

def read_labels(f_names: list):
    labels = np.zeros(len(f_names))
    y_df = pd.read_csv('../res/train.csv', header=0, dtype={'new_id':str, 'genre':np.int16})
    y_df = y_df.set_index('new_id')
    for i in range(len(f_names)):
        labels[i] = y_df.loc[f_names[i][:-4]].genre
    return labels

def read_spectrogram(path: str, f_names: list):
    img_data = np.zeros(shape=(len(f_names), expected_spectro_shape[0], expected_spectro_shape[1]))
    for i in range(len(f_names)):
        img_data[i] = io.imread(path + f_names[i][:-3] + 'png')
        if expected_spectro_shape != img_data[i].shape:
            print("index:", i, "has shape", img_data[i].shape)
    print("Spectrogram from", path, "read in! Shape is:", img_data.shape)
    return img_data

def save_audio_as_spectrogram(data: list, srs: list, f_names: list, path: str):
    def scale_minmax(x_audio, min=0.0, max=1.0):
        x_audio_std = (x_audio - x_audio.min()) / (x_audio.max() - x_audio.min())
        x_audio_scaled = x_audio_std * (max - min) + min
        return x_audio_scaled
    hop_length = 512  # samples per time sample
    time_steps= 2550  # width of data
    n_mels = 128  # height

    for i in range(len(f_names)):
        data[i] = data[i][:time_steps * hop_length]
        mels = librosa.feature.melspectrogram(y=data[i], sr=srs[i],
                                              n_mels=n_mels,
                                              n_fft=hop_length*2, hop_length=hop_length)
        mels = np.log(mels + 1e-9) # add small number to avoid log(0)

        # min-max scale to fit inside 8-bit range
        img = scale_minmax(mels, 0, 255).astype(np.uint8)
        img = np.flip(img, axis=0) # put low frequencies at the bottom in image
        img = 255 - img # invert. make black==more energy

        # save as PNG
        io.imsave(path + f_names[i][:-4] + '.png', img)
    print("Finished! Images saved to", path)

## Set Path Variables


In [None]:
train_wav_path = '../res/wav/train/'
test_wav_path = '../res/wav/test/'
train_spectro_path = '../res/spectrogram/train/'
test_spectro_path = '../res/spectrogram/test/'

train_wav_names = os.listdir(train_wav_path)
test_wav_names = os.listdir(test_wav_path)
train_spectro_names = os.listdir(train_spectro_path)
test_spectro_names = os.listdir(test_spectro_path)

expected_spectro_shape = (128, 2551)
num_classes = 6

print("num train wavs:", len(train_wav_names))
print("num test wavs:", len(test_wav_names))
print("num train spectros:", len(train_spectro_names))
print("num test spectros:", len(test_spectro_names))
print("expected_spectro_shape:", expected_spectro_shape)

## Read Training .wav Files and Save as Spectrograms

In [None]:
print("Number of train .wav files in audio folder:", len(train_wav_names))
training_wav, training_srs = read_audio(train_wav_names, train_wav_path)

save_audio_as_spectrogram(training_wav,
                          training_srs,
                          train_wav_names,
                          train_spectro_path)

## Read Testing .wav Files and Save as Spectrograms

In [None]:
print("Number of train .wav files in audio folder:", len(test_wav_names))
testing_wav, testing_srs = read_audio(test_wav_names, test_wav_path)

save_audio_as_spectrogram(testing_wav,
                          testing_srs,
                          test_wav_names,
                          test_spectro_path)

## Read Training & Testing Spectrogram PNGs

In [None]:
training_x = read_spectrogram(train_spectro_path, train_spectro_names)
training_labels = read_labels(train_wav_names)
testing_x = read_spectrogram(test_spectro_path, test_spectro_names)

# TF Model

In [None]:
model = keras.models.Sequential([
    layers.Conv2D(filters=32, kernel_size=3, strides=1),
    # layers.BatchNormalization(),
    # layers.MaxPool2D(pool_size=(2, 2), strides=1),
    layers.Flatten(),
    layers.Dense(128),
    layers.Dropout(0.1),
    layers.Dense(num_classes)
])
model.compile()

## TF Training

In [None]:
amount_train_x = len(training_x)//2
# train_set_x = training_x[]
# train_set_y = training_labels[:amount_train_x].copy()

eval_set_x = training_x[amount_train_x:]
eval_set_y = training_labels[amount_train_x:]

# model.compile(optimizer='adam',
#               loss=loss_fn,
#               metrics=['accuracy'])
model.fit(training_x, training_labels, epochs=5)

## TF Training Evaluation

In [None]:
model.evaluate(train_set_x, train_set_y, epochs=10, validation_data=(eval_set_x, eval_set_y))

## File Writing