In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import keras
from keras.optimizers import Adam
from keras.layers import Input, GRU, Flatten, MaxPool2D, MaxPool1D
from keras.layers import PReLU, Dropout, Lambda, Dense
from keras.models import Model
import tensorflow as tf

from music_generator.basic.random import generate_dataset
from music_generator.basic.signalproc import SamplingInfo
from music_generator.musical.timing import Tempo
from music_generator.musical.scales import GenericScale
from music_generator.analysis.play import play_mono_as_stereo, play_array
from music_generator.basic.signalproc import mix_at
from music_generator.analysis import preprocessing

from music_generator.musical import scales
import numpy as np
from multiprocessing import Pool
from functools import partial

import matplotlib.pyplot as plt
from IPython.display import Audio
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
sampling_info = SamplingInfo(22050)

In [4]:
all_roots = scales.chromatic_scale('C')
roots = [n.get_symbol() for n in all_roots.generate(0, 1)]
roots

['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

In [5]:
def generate_dataset_for_root(root):
    return generate_dataset(n_measures=64,
                            tempo=Tempo(120),
                            scale=GenericScale(root, [0, 2, 3, 5, 7, 8, 10]),
                            sampling_info=sampling_info)
    
with Pool(8) as pool:
    datasets = pool.map(generate_dataset_for_root, roots)

In [6]:
# Make one big data set and make sure data is of same size
audio_tracks, mix = preprocessing.combine_datasets(datasets)

In [7]:
mix.shape[-1] == audio_tracks.shape[-1]

True

In [8]:
mix = mix_at(mix_at(audio_tracks[2], audio_tracks[1]), audio_tracks[0])

In [13]:
len(mix) / 4096

8260.6728515625

In [14]:
n_samples = 10000
fragment_length = 4096
input_track = mix
target_track = audio_tracks[2]

In [15]:
play_array(input_track, sampling_info.sample_rate, range_secs=[50, 51])
play_array(target_track, sampling_info.sample_rate, range_secs=[50, 51])

# Create training set

In [16]:
x, y = preprocessing.create_training_data_set(n_samples, 
                                              fragment_length, 
                                              input_track, 
                                              target_track)

x = x.reshape(x.shape + (1,))

In [17]:
play_array(np.tile(x[2, :, 0], 10), sample_rate=sampling_info.sample_rate)

# Build network

In [18]:
input_shape = x[0].shape
output_shape = x[1].shape[0]

In [19]:
def fft_loss(y_true, y_pred):
    return tf.losses.mean_squared_error(tf.abs(tf.spectral.rfft(y_true)), 
                                        tf.abs(tf.spectral.rfft(y_pred)))

def mse_scaled(y_true, y_pred):
    return 100*tf.losses.mean_squared_error(y_true, y_pred)

In [20]:
x_train = x.reshape(-1, 128, 1)
y_train = y.reshape(-1, 128)

In [21]:
batch_size = 32
n_steps = 128
batch_inp_shape = (batch_size, n_steps, 1)

In [22]:
inp = Input(batch_shape=batch_inp_shape)
out = inp
out = GRU(128, stateful=True)(out)
out = Dense(256)(out)
out = PReLU()(out)
out = Dense(128)(out)
model = Model(inp, out)
model.summary()
model.compile(Adam(lr=1e-3), mse_scaled)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (32, 128, 1)              0         
_________________________________________________________________
gru_1 (GRU)                  (32, 128)                 49920     
_________________________________________________________________
dense_1 (Dense)              (32, 256)                 33024     
_________________________________________________________________
p_re_lu_1 (PReLU)            (32, 256)                 256       
_________________________________________________________________
dense_2 (Dense)              (32, 128)                 32896     
Total params: 116,096
Trainable params: 116,096
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(x_train, y_train, epochs=2, validation_split=0.2)

Train on 256000 samples, validate on 64000 samples
Epoch 1/2
Epoch 2/2
 21248/256000 [=>............................] - ETA: 5:16 - loss: 0.2911

In [None]:
n_seconds_to_predict = 10
n_batches_predict = (n_seconds_to_predict * sampling_info.sample_rate //
                     batch_size // n_steps)

pred_x = mix[0:n_batches_predict*batch_size*n_steps].reshape(-1, n_steps, 1)

pred_y = model.predict(pred_x)
play_array(pred_y, sample_rate=sampling_info.sample_rate)