# Imports

In [11]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from skimage import io

# Prepare Data

In [20]:
def read_audio(f_names, is_training, base_path='../res/wav/'):
    path = base_path + ('train' if is_training else 'test') + '/'
    data   = [[]] * len(f_names)  # (data, samp_rate)
    srs     = [0] * len(f_names)
    labels = [0] * len(f_names)
    if is_training:
        y_df = pd.read_csv('../res/train.csv', header=0, dtype={'new_id':str, 'genre':np.int16})
        y_df = y_df.set_index('new_id')
    
    for i in range(len(f_names)):
        x, sr = librosa.load(path + f_names[i], sr=None, mono=True)
        data[i] = x
        srs[i] = sr

        if is_training:
            labels[i] = y_df.loc[f_names[i][:-4]].genre
        if i % 100 == 0:
            print('i=', i, '\t num points:', x.shape, 'samp_rate:', sr)
    return (data, srs, labels) if is_training else (data, srs)

def save_audio_as_spectrogram(data, srs, f_names, path):
    def scale_minmax(x_audio, min=0.0, max=1.0):
        x_audio_std = (x_audio - x_audio.min()) / (x_audio.max() - x_audio.min())
        x_audio_scaled = x_audio_std * (max - min) + min
        return x_audio_scaled
    # hop_length = 512  #
    # n_mels = 128  # height
    # time_steps= 384  # width
    
    for i in range(len(data)):
        mels = librosa.feature.melspectrogram(y=data[i], sr=srs[i])
        mels = np.log(mels + 1e-9) # add small number to avoid log(0)
    
        # min-max scale to fit inside 8-bit range
        img = scale_minmax(mels, 0, 255).astype(np.uint8)
        img = np.flip(img, axis=0) # put low frequencies at the bottom in image
        img = 255-img # invert. make black==more energy
    
        # save as PNG
        print(path + f_names[i][:-4])
        io.imsave(path + f_names[i][:-4] + '.png', img)
    
def read_spectrograms(path):
    pass

## Read in Training .wav files

In [6]:
train_file_names = os.listdir('../res/wav/train/')
print("Number of train .wav files in audio folder:", len(train_file_names))
training_x, training_srs, training_labels = read_audio(train_file_names, is_training=True)
print('First 5 are:', training_x[:5], training_labels[:5])


Number of train .wav files in audio folder: 2400
i= 0 	 num points: (1321967,) samp_rate: 44100
i= 25 	 num points: (1323119,) samp_rate: 44100
i= 50 	 num points: (1323119,) samp_rate: 44100
i= 75 	 num points: (1321967,) samp_rate: 44100
i= 100 	 num points: (1321967,) samp_rate: 44100
i= 125 	 num points: (1321967,) samp_rate: 44100
i= 150 	 num points: (1323119,) samp_rate: 44100
i= 175 	 num points: (1323119,) samp_rate: 44100
i= 200 	 num points: (1321967,) samp_rate: 44100
i= 225 	 num points: (1321967,) samp_rate: 44100
i= 250 	 num points: (1323119,) samp_rate: 44100
i= 275 	 num points: (1323119,) samp_rate: 44100
i= 300 	 num points: (1323119,) samp_rate: 44100
i= 325 	 num points: (1321967,) samp_rate: 44100
i= 350 	 num points: (1323119,) samp_rate: 44100
i= 375 	 num points: (1323119,) samp_rate: 44100
i= 400 	 num points: (1321967,) samp_rate: 44100
i= 425 	 num points: (1323119,) samp_rate: 44100
i= 450 	 num points: (1323119,) samp_rate: 44100
i= 475 	 num points: (132

## Write Training Spectrograph PNGs

In [21]:
save_audio_as_spectrogram(training_x,
                          training_srs,
                          train_file_names,
                          '../res/spectrogram/train/')

../res/spectrogram/train/00907299
../res/spectrogram/train/00907479
../res/spectrogram/train/00907482


## Read in Testing .wav files

In [None]:
test_file_names = os.listdir('../res/wav/test/')
print("Number of train .wav files in audio folder:", len(test_file_names))
testing_x, testing_srs, testing_labels = read_audio(test_file_names, is_training=False)
print('First 5 are:', testing_x[:5], testing_labels[:5])

## Write Testing Spectrograph PNGs

In [None]:
save_audio_as_spectrogram(testing_x,
                          testing_srs,
                          test_file_names,
                          '../res/spectrogram/test/')

# TF Training

In [None]:
X = librosa.stft(training_tracks_sr[0][0])
Xdb = librosa.amplitude_to_db(abs(X))
f, axes = plt.subplots(figsize=(10, 5))
p = librosa.display.specshow(Xdb, sr=training_tracks_sr[0][1], x_axis='time', y_axis='log', ax=axes)
p.plot()

# model = tf.keras.models.Sequential()
# model = keras.Model(inputs=inputs, outputs=outputs)
