In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import keras
from music_generator.basic.random import generate_dataset
from music_generator.basic.signalproc import SamplingInfo
from music_generator.musical.timing import Tempo
from music_generator.musical.scales import GenericScale
from music_generator.analysis.play import play_mono_as_stereo, play_array
from music_generator.basic.signalproc import mix_at

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
sampling_info = SamplingInfo(44100)

In [None]:
ds1 = generate_dataset(n_measures=64,
                       tempo=Tempo(120),
                       scale=GenericScale('E', [0, 2, 3, 5, 7, 8, 10]),
                       sampling_info=sampling_info)

ds2 = generate_dataset(n_measures=64,
                       tempo=Tempo(120),
                       scale=GenericScale('F#', [0, 2, 3, 5, 7, 8, 10]),
                       sampling_info=sampling_info)

In [None]:
from music_generator.analysis.preprocessing import combine_datasets

In [None]:
# def combine_datasets(ds1, ds2):
    
#     print("WARNING: not combining the score tracks")
#     audio_tracks = [np.concatenate((x1, x2)) for x1, x2 in zip(ds1[1], ds2[1])]
#     mix = np.concatenate((ds1[2], ds2[2]))
    
#     return audio_tracks, mix

In [None]:
audio_tracks, mix = combine_datasets(ds1, ds2)

In [None]:
play_mono_as_stereo(mix[0:5*44100], sampling_info.sample_rate)

In [None]:
# play_mono_as_stereo(mix_at(mix, -audio_tracks[2], at=0))

In [None]:
n_samples = 4096 * 10
fragment_length = 1024 * 1
input_track = mix
target_track = audio_tracks[2]

# Create training set

In [None]:
from music_generator.analysis.preprocessing import create_training_data_set
# def create_training_data_set(n_samples, fragment_length, input_track, target_track):
#     max_index = min(len(mix), len(target_track))
#     max_start_index = max_index - fragment_length
    
#     # Selection range
#     selection_ranges = np.random.randint(0, max_start_index, n_samples)
#     selection_ranges = [{'begin': x, 'end': x + fragment_length} for x in selection_ranges]
    
#     x = np.array([mix[sr['begin']:sr['end']] for sr in selection_ranges])
#     y = np.array([target_track[sr['begin']:sr['end']] for sr in selection_ranges])
    
#     return x, y

In [None]:
x, y = create_training_data_set(n_samples, fragment_length, mix, audio_tracks[2])

In [None]:
def x_fade_profile(batch_dim):
    x = np.arange(batch_dim)
    return 1 - abs(x - (batch_dim / 2)) / (batch_dim / 2)

In [None]:
def model_predict(model, input_track):
    dim = input_shape[0]
    n_batches = int(len(input_track) / dim) - 1
    pred_batches = input_track[0:n_batches*dim].reshape((-1, dim))
    
    pred_batches_shifted = input_track[dim//2:n_batches*dim + dim//2].reshape((-1, dim))
    
    xfp = x_fade_profile(dim)
    
    x0 = np.array([xfp * batch for batch in model.predict(pred_batches)]).reshape(-1)
    x1 = np.array([xfp * batch for batch in model.predict(pred_batches_shifted)]).reshape(-1)
    
    return mix_at(x0, x1, dim//2)
#     np.tanh()

# play_array(model_predict(model, mix), do_wait_done=False)
# play_array(model_predict(model, mix[0:5*44100]))

# Build network

In [None]:
from keras.layers import Dense, Dropout, PReLU
from keras.optimizers import Adam

In [None]:
input_shape = x[0].shape
output_shape = x[1].shape[0]

In [None]:
model = keras.models.Sequential()
model.add(Dense(1024, input_shape=input_shape))
model.add(PReLU())
model.add(Dense(256))
model.add(PReLU())
model.add(Dense(output_shape))
model.compile(Adam(), 'mse')
play_array(model_predict(model, mix)[0:5*44100], do_wait_done=False)

In [None]:
while True:
    model.fit(x, y, epochs=2)
    play_array(model_predict(model, mix)[0:5*44100], do_wait_done=True)  
    play_array(mix[0:5*44100])

In [None]:
play_array(model_predict(model, mix[0:15*44100]))

In [None]:
play_array(mix[0:15*44100])

In [None]:
from scipy.io.wavfile import read
wf = read("/Users/marcelraas/dev/test_data/anotherDay.wav", mmap=False)
# play_array(model_predict(model, wf[1][120*44100:240*44100, 0] / 32000))

In [None]:
score_tracks_t, audio_tracks_t, mix_t = generate_dataset(n_measures=64,
                                                         tempo=Tempo(120),
                                                         scale=GenericScale('E', [0, 1, 4, 5, 7, 8, 10]),
                                                         sampling_info=sampling_info)

In [None]:
play_array(model_predict(model, mix_t[0:15*44100]))
# play_array(model_predict(model, audio_tracks_t[2][0:15*44100]))