In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import datetime as dt

import music_generator.analysis.preprocessing as pp
from music_generator.analysis.play import play_array

import music_generator.analysis.stft as stft

In [3]:
# https://blog.keras.io/building-autoencoders-in-keras.html

import keras

from keras.layers import Dense, Dropout, PReLU, Input
from keras.models import Model
from keras.optimizers import Adam

from keras.regularizers import l2
from keras.models import load_model

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras import metrics
from keras.datasets import mnist, fashion_mnist
from sklearn.preprocessing import MinMaxScaler

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data preprocessing

In [79]:
# sampling_rate, data = pp.read_wave_file("../data/Verplichte Kots_v9.wav", 0)
sampling_rate, data = pp.read_wave_file("../data/Dream Theater - Another day-fm7ntyycGbU.wav", 0)
# play_array(data, sampling_rate, range_secs=[17, 31])
# data = data[15*44100:29*44100]

In [86]:
stft_sample_size = 1000
stft_stride = 2048

In [87]:
np.array(data).shape

(12784149,)

In [88]:
# x_train = stft.forward_stft(data, stft_sample_size, stft_stride)
data = data[0:int(len(data)/stft_sample_size)*stft_sample_size]

x_train = data.reshape(-1, stft_sample_size)

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
# y = stft.forward_stft(data, stft_sample_size, stft_stride)

## Constructing the model

In [89]:
# original_dim = 28 * 28
original_dim = stft_sample_size
intermediate_dim = 10
latent_dim = 2
epsilon_std = 1.0
epochs = 1
batch_size = 128

In [90]:
x = Input(shape=(original_dim,))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,
                              stddev=epsilon_std)
    return z_mean + K.exp(z_log_var / 2) * epsilon

# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

# instantiate VAE model
vae = Model(x, x_decoded_mean)

# Compute VAE loss

def custom_loss(x, x_decoded_mean):
    xent_loss = original_dim * metrics.mse(x, x_decoded_mean)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(xent_loss + kl_loss)
    return vae_loss

# vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop', loss=custom_loss)
vae.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 1000)         0                                            
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 10)           10010       input_7[0][0]                    
__________________________________________________________________________________________________
dense_17 (Dense)                (None, 2)            22          dense_16[0][0]                   
__________________________________________________________________________________________________
dense_18 (Dense)                (None, 2)            22          dense_16[0][0]                   
__________________________________________________________________________________________________
lambda_4 (

In [91]:
vae.fit(x_train, y=x_train,
        # shuffle=True,
        epochs=64,
        batch_size=batch_size)

Epoch 1/64
Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64
Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64
Epoch 12/64
Epoch 13/64
Epoch 14/64
Epoch 15/64
Epoch 16/64
Epoch 17/64
Epoch 18/64
Epoch 19/64
Epoch 20/64
Epoch 21/64
Epoch 22/64
Epoch 23/64
Epoch 24/64
Epoch 25/64
Epoch 26/64
Epoch 27/64
Epoch 28/64
Epoch 29/64
Epoch 30/64
Epoch 31/64
Epoch 32/64
Epoch 33/64
Epoch 34/64
Epoch 35/64
Epoch 36/64
Epoch 37/64
Epoch 38/64
Epoch 39/64
Epoch 40/64
Epoch 41/64
Epoch 42/64
Epoch 43/64
Epoch 44/64
Epoch 45/64
Epoch 46/64
Epoch 47/64
Epoch 48/64
Epoch 49/64
Epoch 50/64
Epoch 51/64
Epoch 52/64
Epoch 53/64
Epoch 54/64
Epoch 55/64
Epoch 56/64
Epoch 57/64
Epoch 58/64
Epoch 59/64
Epoch 60/64
Epoch 61/64
Epoch 62/64
Epoch 63/64
Epoch 64/64


<keras.callbacks.History at 0x7f5434870da0>

In [69]:
encoder = Model(x, z_mean)

# display a 2D plot of the digit classes in the latent space
x_train_encoded = encoder.predict(x_train, batch_size=batch_size)

# build a digit generator that can sample from the learned distribution
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)

In [70]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [71]:
# plt.figure(figsize=(6, 6))
# plt.scatter(x_train_encoded[:, 0], x_train_encoded[:, 1], c=range(len(x_train_encoded)), alpha=0.01)
trace = [go.Scatter3d(x=x_train_encoded[:, 0], 
                      y=x_train_encoded[:, 1], 
                      z=np.arange(len(x_train_encoded)),
                      mode='markers',
                        marker=dict(
                            size=3,
                            color=np.arange(len(x_train_encoded)),                # set color to an array/list of desired values
                            colorscale='Viridis',   # choose a colorscale
                            opacity=0.8
                        ))]

fig = go.Figure(data=trace, layout=go.Layout(height=800, width=1000))

# iplot(fig)
# plt.colorbar()

In [72]:
n = 32
grid_x = np.percentile(x_train_encoded[:, 0], np.linspace(0.05, 0.95, n))
grid_y = np.percentile(x_train_encoded[:, 1], np.linspace(0.05, 0.95, n))
# grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
# grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

In [73]:
from multiprocessing import Pool

In [74]:
x_to_reverse = []

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        # print(z_sample)
        x_decoded = generator.predict(z_sample)
        x_decoded = scaler.inverse_transform(x_decoded)[0]
        x_to_reverse.append(x_decoded)

x_to_reverse = np.array(x_to_reverse)

In [75]:
# np.array(x_to_reverse).shape

In [76]:
# _ = stft.backward_stft(scaler.inverse_transform(x_train), stft_stride)
# play_array(_, range_secs=[0,10])

In [78]:
# x_sampled = stft.backward_stft(np.array(x_to_reverse), stft_stride)
play_array(x_to_reverse.T.reshape(-1)) 