In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import glob
import scipy.io.wavfile as wavfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Audio
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.models import Model

In [None]:
DATA_DIR = "/Users/marcelraas/tutorials/accelerator/lecture11_signalprocessing/data/audioset_music_genres/*.wav" 
DEBUG = False
SAMPLE_LENGTH = 220500
EXCERPT_LENGTH = 2*4096
BATCH_SIZE = 64
SAMPLE_RATE = 22050

## Load data

In [None]:
files = glob.glob(DATA_DIR)

if DEBUG:
    files = files[:1000]

In [None]:
all_data = []

for file in files:
    sr, data = wavfile.read(file)
    data = data / 2**15
    all_data.append(data)
    
all_data = np.array(all_data)

## Data cleaning & to matrix

In [None]:
def clean_data(all_data):

    lens = np.array([len(d) for d in all_data])
    long_enough_samples = (lens == SAMPLE_LENGTH) 
    return all_data[long_enough_samples]

all_data = clean_data(all_data)

X = np.zeros(shape=[len(all_data), SAMPLE_LENGTH])

for row_idx, row in enumerate(all_data):
    X[row_idx, :] = row
    
del all_data

## Excerpt generation

In [None]:
from music_generator.analysis.generators import SongMatchingSampler

generator = SongMatchingSampler(wave_data=X, num_batches_per_epoch=10, batch_size=BATCH_SIZE, excerpt_length=EXCERPT_LENGTH)

In [None]:
generator[0][0][:, 0].shape

In [None]:
from IPython.display import display

In [None]:
generator[0][0][0][0]

In [None]:
def play_excerpt(excerpt, idx=0):

    data = excerpt[0]
    data_at_idx = data[idx]
    display(Audio(np.append(np.append(data_at_idx[0], np.array([0]*2*4096)), data_at_idx[1]), rate=SAMPLE_RATE, autoplay=True))
    display(excerpt[1][idx])

    
play_excerpt(generator[0], 14)

In [None]:
from tensorflow.keras.layers import Input, Dense, GRU, Dropout, PReLU, BatchNormalization, Flatten, Concatenate

In [None]:
FFT_SIZE = 8192

In [None]:
def build_model():
    
    inp = Input(shape=[2, EXCERPT_LENGTH])
    x = inp
    
    x1 = tf.slice(x, [0, 0, 0], [BATCH_SIZE, 1, EXCERPT_LENGTH])
    x1 = tf.reshape(x1, [BATCH_SIZE, EXCERPT_LENGTH // FFT_SIZE, FFT_SIZE])
    x2 = tf.slice(x, [0, 1, 0], [BATCH_SIZE, 1, EXCERPT_LENGTH])
    x2 = tf.reshape(x2, [BATCH_SIZE, EXCERPT_LENGTH // FFT_SIZE, FFT_SIZE])
    
    x1 = tf.math.real(tf.signal.fft(tf.complex(x1, 0.)))
    x2 = tf.math.real(tf.signal.fft(tf.complex(x2, 0.)))
    
    gru_1 = GRU(units=640)(x1)
    gru_2 = GRU(units=640)(x2)
    
    x = Concatenate()([gru_1, gru_2])
    
    x = Dense(units=320, activation='relu')(x)
    x = Dense(units=64, activation='relu')(x)    
    x = Dense(units=16, activation='relu')(x)
    
    out = Dense(units=1, activation='sigmoid')(x)
    
    return Model(inp, out)

model = build_model()

In [None]:
model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam, Nadam

In [None]:
model.compile(Nadam(lr=1e-3), 'binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit_generator(generator, 
                    epochs=1,
                    steps_per_epoch=1000)

In [None]:
import pandas as pd

In [None]:
batch = generator[0]

In [None]:
batch[1]

In [None]:
all_df = []

for batch in generator:

    df = pd.DataFrame({
        "pred": model.predict(batch, steps=1).reshape(-1),
        "true": batch[1]
    })
    
    all_df.append(df)
    
df = pd.concat(all_df)

In [None]:
for idx, g in df.groupby("true"):
    g['pred'].hist(bins=np.linspace(0, 1, 50), label=idx, alpha=0.3)
    
plt.legend()