In [1]:
%load_ext autoreload
%autoreload 2

from os import listdir
from os.path import isfile, join

import numpy as np
import matplotlib.pyplot as plt
import madmom

import sys
sys.path.append('../src')
from preprocessing import get_dataset, spectro_mini_db_patches
from models import OLSPatchRegressor

na = np.newaxis

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# Create a mini database with patches from the spectrogram and Train a simple OLS regressor on it

TODO: 

- add STFT options to the spectrogram (window size etc)
- add possibility to use different options at the same time (add depth dimension, is there a problem with the resulting shape?)

## Training on Full Data

In [2]:
music_dir  = '../data/music_speech/music_wav/'
speech_dir = '../data/music_speech/speech_wav/'

num_samples = -1

X, Y = get_dataset(music_dir, speech_dir, num_samples=num_samples, hpool=0, wpool=0)
print('Train Set Shape')
print(X.shape, Y.shape)

Train Set Shape
(128, 74, 1500, 1) (128,)


In [3]:
N, = Y.shape

train_test_ratio = 0.8
n_train_images = int(N * train_test_ratio)

I = np.random.permutation(N)
train_indices = I[:n_train_images]
test_indices  = I[n_train_images:]

XTrain = X[train_indices]
YTrain = Y[train_indices]
XTest  = X[test_indices]
YTest  = Y[test_indices]

# Train linear patch regressor (att: no bias)
regressor = OLSPatchRegressor()
regressor.fit(XTrain, YTrain)

print('Train Accuracy (Conv): {}'.format(np.mean(np.sign(np.mean(regressor.predict(XTrain), axis=1)) == YTrain)))
print('Test  Accuracy (Conv): {}'.format(np.mean(np.sign(np.mean(regressor.predict(XTest), axis=1)) == YTest)))

Train Accuracy (Conv): 0.7941176470588235
Test  Accuracy (Conv): 0.8076923076923077


# ----------------------------------

In [4]:
def show_in_grid(input_3d, instant_output=True, figsize=(20, 20), save_path = None):
    
    N, H, W = input_3d.shape

    N_h = int(np.floor(N**.5))
    N_w = N // N_h

    hpad, wpad = 1, 1
    pad_val = np.min(input_3d)

    # add padding and grid presentation
    padded_input = np.pad(input_3d[:N_h * N_w], [[0,0], [hpad,hpad], [wpad,wpad]], mode='constant', constant_values=pad_val)
    H_padded = H + 2*wpad
    W_padded = W + 2*hpad
    spectro_grid = padded_input.reshape(N_h, N_w, H_padded, W_padded).transpose(0, 2, 1, 3).reshape(N_h* H_padded, N_w * W_padded)
    
    # present the grid
    fig = plt.figure(figsize=figsize)
    plt.imshow(spectro_grid, origin='lower')
    plt.axis('off')
    
    if save_path is not None:
        plt.savefig(save_path, dpi=300)
    
    if instant_output:
        plt.show()