In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import keras
from keras.optimizers import Adam
from keras.layers import Input, GRU, Flatten, MaxPool2D, MaxPool1D
from keras.layers import PReLU, Dropout, Lambda, Dense
from keras.models import Model
import tensorflow as tf

from music_generator.basic.random import generate_dataset
from music_generator.basic.signalproc import SamplingInfo
from music_generator.musical.timing import Tempo
from music_generator.musical.scales import GenericScale
from music_generator.analysis.play import play_mono_as_stereo, play_array
from music_generator.basic.signalproc import mix_at
from music_generator.analysis import preprocessing

from music_generator.musical import scales
import numpy as np
from multiprocessing import Pool
from functools import partial

import matplotlib.pyplot as plt
from IPython.display import Audio
%matplotlib inline

In [None]:
sr = 22050
sampling_info = SamplingInfo(sr)

In [None]:
all_roots = scales.chromatic_scale('C')
roots = [n.get_symbol() for n in all_roots.generate(0, 1)]
roots

In [None]:
def generate_dataset_for_root(root):
    return generate_dataset(n_measures=256,
                            tempo=Tempo(120),
                            scale=GenericScale(root, [0, 2, 3, 5, 7, 8, 10]),
                            sampling_info=sampling_info)
    
with Pool(8) as pool:
    datasets = pool.map(generate_dataset_for_root, roots)

In [None]:
# Make one big data set and make sure data is of same size
audio_tracks, mix = preprocessing.combine_datasets(datasets)

In [None]:
mix.shape[-1] == audio_tracks.shape[-1]

In [None]:
mix = mix_at(mix_at(audio_tracks[2], audio_tracks[1]), audio_tracks[0])

In [None]:
len(mix) / 4096

In [None]:
n_samples = len(mix) // 4096
fragment_length = 4096
input_track = mix
target_track = audio_tracks[2]

In [None]:
play_array(input_track, sampling_info.sample_rate, range_secs=[50, 51])
play_array(target_track, sampling_info.sample_rate, range_secs=[50, 51])

# Create training set

In [None]:
x, y = preprocessing.create_training_data_set(n_samples, 
                                              fragment_length, 
                                              input_track, 
                                              target_track)

x = x.reshape(x.shape + (1,))

In [None]:
play_array(np.tile(x[2, :, 0], 10), sample_rate=sampling_info.sample_rate)

# Build network

In [None]:
input_shape = x[0].shape
output_shape = x[1].shape[0]

In [None]:
def fft_loss(y_true, y_pred):
    return tf.losses.mean_squared_error(tf.abs(tf.spectral.rfft(y_true)), 
                                        tf.abs(tf.spectral.rfft(y_pred)))

def mse_scaled(y_true, y_pred):
    return 100*tf.losses.mean_squared_error(y_true, y_pred)

In [None]:
batch_size = 32
n_steps = 2048
batch_inp_shape = (batch_size, n_steps, 1)

x_train = x.reshape(-1, n_steps, 1)
y_train = y.reshape(-1, n_steps, 1)

In [None]:
inp = Input(batch_shape=batch_inp_shape)
out = inp
out = GRU(512, return_sequences=True)(out)
out = Dense(256, activation='relu')(out)
# out = Dense(256, activation='relu')(out)
out = Dense(256)(out)
out = Dense(1)(out)
model = Model(inp, out)
model.summary()
model.compile(Adam(lr=1e-3), fft_loss)

In [None]:
x_train = x_train[0:9984]
y_train = y_train[0:9984]

In [None]:
model.fit(x_train, y_train, epochs=2)

In [None]:
n_seconds_to_predict = 10
n_batches_predict = (n_seconds_to_predict * sampling_info.sample_rate //
                     batch_size // n_steps)

pred_x = mix[0:n_batches_predict*batch_size*n_steps].reshape(-1, n_steps, 1)

pred_y = model.predict(pred_x)
play_array(pred_y, sample_rate=sampling_info.sample_rate)