In [1]:
import os
os.environ["THEANO_FLAGS"] = "device=cpu,floatX=float32,exception_verbosity=high"

In [2]:
import os
import sys
import traceback
import functools
import numpy as np
import matplotlib
import matplotlib.pylab as plt
import seaborn as sbn
import deepdish as dd
import theano
import theano.tensor as T
import lasagne
from IPython import display
from tqdm import tqdm
import neural_networks as nn
from music_utils import generateSequence
from nnet_utils import get_next_batch_rnn
%matplotlib inline

import pdb

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [3]:
# generate data for interval major and minor    
int_maj = [0, 2, 2, 1, 2, 2, 2]
int_min = [0, 2, 1, 2, 2, 1, 2]
min_len = 2
max_len = len(int_maj)
input_maj, target_maj, masks_maj = generateSequence(int_maj, min_len, max_len)
input_min, target_min, masks_min = generateSequence(int_min, min_len, max_len)

In [4]:
d_batch_size = 64
g_batch_size = 64
epoch_size = len(input_maj)
n_timesteps = 7
n_features = 12
n_conditions = 3

In [5]:
input_data = np.concatenate((input_maj, input_min)).astype(np.float32)
target_data = np.concatenate((target_maj, target_min)).astype(np.float32)
masks_data = np.concatenate((masks_maj, masks_min)).astype(np.int32)
cond_data = np.concatenate((np.zeros((len(masks_maj), n_features, n_conditions), dtype=np.float32),
                            np.ones((len(masks_min), n_features, n_conditions), dtype=np.float32)))
lbls_data = np.zeros((len(cond_data), n_conditions), dtype=np.float32)
lbls_data[:len(masks_maj), 0] = 1                            
lbls_data[len(masks_maj):, 1] = 1

In [6]:
def output_nonlinearity(data, temperature=1):
    return lasagne.nonlinearities.softmax(lasagne.nonlinearities.linear(data / temperature))
    
nn_output_nonlinearity = functools.partial(output_nonlinearity, temperature=1)

In [7]:
d_specs = {'batch_size': d_batch_size,
           'epoch_size': epoch_size,
           'input_shape': (None, n_timesteps, n_features),
           'mask_shape': (None, n_timesteps),
           'n_output_units': n_conditions,
           'n_units': 16,
           'n_hidden': 16,
           'grad_clip': 100.,
           'init': lasagne.init.HeUniform(),
           'non_linearities': (
              lasagne.nonlinearities.tanh,  # feedforward
              lasagne.nonlinearities.tanh,  # feedbackward
              nn_output_nonlinearity),  # apply sotfmax with temperature
           'learning_rate': 1e-2,           
          }

g_specs = {'batch_size': g_batch_size,
           'epoch_size': epoch_size,
           'noise_shape': (None, n_timesteps, n_features),
           'cond_shape': (None, n_timesteps, n_conditions),
           'mask_shape': (None, n_timesteps),
           'softmax_shape': (g_batch_size * n_timesteps, n_features),
           'output_shape': (g_batch_size, n_timesteps, n_features),
           'n_output_units': n_timesteps * n_features,
           'n_units': 16,
           'n_hidden': 16,
           'grad_clip': 100.,
           'init': lasagne.init.HeUniform(),
           'non_linearities': (
              lasagne.nonlinearities.tanh,  # feedforward
              lasagne.nonlinearities.tanh,  # feedbackward
              lasagne.nonlinearities.linear),  # apply sotfmax with temperature
           'learning_rate': 1e-2,           
          }

In [8]:
# declare theano variables
d_in_X = T.ftensor3('data')
g_in_Z = T.ftensor3('noise')
g_in_C = T.ftensor3('condition')
d_in_M = T.imatrix('dismask')
g_in_M = T.imatrix('genmask')

In [9]:
def build_discriminator(params):
    # input layers
    l_in = lasagne.layers.InputLayer(shape=d_specs['input_shape'], name='d_in') 
    l_mask = lasagne.layers.InputLayer(shape=d_specs['mask_shape'], name='d_mask')

    # recurrent layers for bidirectional network
    l_forward = lasagne.layers.RecurrentLayer(
        l_in, d_specs['n_units'], grad_clipping=d_specs['grad_clip'],
        W_in_to_hid=d_specs['init'], W_hid_to_hid=d_specs['init'],
        nonlinearity=d_specs['non_linearities'][0], only_return_final=True, mask_input=l_mask)
    l_backward = lasagne.layers.RecurrentLayer(
        l_in, d_specs['n_units'], grad_clipping=d_specs['grad_clip'],
        W_in_to_hid=d_specs['init'], W_hid_to_hid=d_specs['init'],        
        nonlinearity=d_specs['non_linearities'][1], only_return_final=True, mask_input=l_mask,
        backwards=True)

    # concatenate output of forward and backward layers
    l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward])

    # output layer
    l_out = lasagne.layers.DenseLayer(
        l_concat, num_units=d_specs['n_output_units'], nonlinearity=d_specs['non_linearities'][2])

    return l_in, l_mask, l_out

In [10]:
def build_generator(params):
    # input layers
    l_input = lasagne.layers.InputLayer(shape=g_specs['noise_shape'], input_var=g_in_Z, name='g_noise')
    l_cond = lasagne.layers.InputLayer(shape=g_specs['cond_shape'], input_var=g_in_C, name='g_cond')
    l_mask = lasagne.layers.InputLayer(shape=g_specs['mask_shape'], input_var=g_in_M, name='g_mask')

    # recurrent layers for bidirectional network
    l_forward_data = lasagne.layers.RecurrentLayer(
        l_input, g_specs['n_units'], grad_clipping=g_specs['grad_clip'],
        W_in_to_hid=g_specs['init'], W_hid_to_hid=g_specs['init'],
        nonlinearity=lasagne.nonlinearities.linear, only_return_final=True, mask_input=l_mask)
    l_forward_cond = lasagne.layers.RecurrentLayer(
        l_cond, g_specs['n_units'], b=None, grad_clipping=g_specs['grad_clip'], 
        W_in_to_hid=g_specs['init'], W_hid_to_hid=g_specs['init'],        
        nonlinearity=lasagne.nonlinearities.linear, only_return_final=True, mask_input=l_mask)
    l_backward_data = lasagne.layers.RecurrentLayer(
        l_input, g_specs['n_units'], grad_clipping=g_specs['grad_clip'],
        W_in_to_hid=g_specs['init'], W_hid_to_hid=g_specs['init'],        
        nonlinearity=lasagne.nonlinearities.linear, only_return_final=True,
        backwards=True, mask_input=l_mask)
    l_backward_cond = lasagne.layers.RecurrentLayer(
        l_cond, g_specs['n_units'], b=None, grad_clipping=g_specs['grad_clip'],
        W_in_to_hid=g_specs['init'], W_hid_to_hid=g_specs['init'],        
        nonlinearity=lasagne.nonlinearities.linear, only_return_final=True,
        backwards=True, mask_input=l_mask)

    # sum linearities of data and condition on forward and and backward recurrent layers
    l_forward_sum = lasagne.layers.ElemwiseSumLayer([l_forward_data, l_forward_cond])
    l_backward_sum = lasagne.layers.ElemwiseSumLayer([l_backward_data, l_backward_cond])

    # apply rectify nonlinearity to forward and backward recurrent layers
    l_forward_nonlinearity = lasagne.layers.NonlinearityLayer(l_forward_sum, 
        nonlinearity=g_specs['non_linearities'][0])
    l_backward_nonlinearity = lasagne.layers.NonlinearityLayer(l_backward_sum, 
        nonlinearity=g_specs['non_linearities'][1])

    # concatenate output of forward and backward layers
    l_concat = lasagne.layers.ConcatLayer(
        [l_forward_nonlinearity, l_backward_nonlinearity])

    # output layer where time is collapsed into one dimension
    l_out = lasagne.layers.DenseLayer(
        l_concat, num_units=g_specs['n_output_units'], nonlinearity=g_specs['non_linearities'][2])    

    # reshape to match discriminator's input
    l_out_reshape = lasagne.layers.ReshapeLayer(l_out, g_specs['softmax_shape'])
    
    # add custom softmax
    l_out_softmax = lasagne.layers.NonlinearityLayer(l_out_reshape, lasagne.nonlinearities.softmax) 
    
    # reshape again to match discriminator's input, just in case it fixes the error
    l_out_softmax_reshape = lasagne.layers.ReshapeLayer(l_out_softmax, g_specs['output_shape'])

    return l_input, l_mask, l_out_softmax_reshape, l_cond

In [11]:
def build_training(discriminator, generator, d_specs, g_specs):
    # get variables from discrimiator and generator
    d_target = T.fmatrix('d_target')
    g_target = T.fmatrix('g_target')
    
    d_params = lasagne.layers.get_all_params(discriminator[2], trainable=True)
    g_params = lasagne.layers.get_all_params(generator[2], trainable=True)
    
    # G(z)
    g_z = lasagne.layers.get_output(generator[2],
                                    inputs={generator[0]:g_in_Z,
                                            generator[1]:g_in_M,
                                            generator[3]:g_in_C})                           
    # D(G(z))
    d_g_z = lasagne.layers.get_output(discriminator[2],         
                                      inputs={discriminator[0]: g_z, 
                                              discriminator[1]: g_in_M})
    
    # D(x)
    d_x = lasagne.layers.get_output(discriminator[2],
                                    inputs={discriminator[0]: d_in_X, 
                                            discriminator[1]: d_in_M})

    g_loss = lasagne.objectives.categorical_crossentropy(1 - d_g_z, g_target)
    g_loss = g_loss.mean()
    g_updates = lasagne.updates.adagrad(g_loss, g_params, g_specs['learning_rate'])
    g_train_fn = theano.function(
        inputs=[g_in_Z, g_in_M, g_in_C, g_target],
        outputs=g_loss, updates=g_updates)
    g_sample_fn = theano.function(
        inputs=[g_in_Z, g_in_M, g_in_C],
        outputs=lasagne.layers.get_output(generator[2]))

    d_loss = (lasagne.objectives.categorical_crossentropy(d_x, d_target) + 
              lasagne.objectives.categorical_crossentropy(d_g_z, g_target))
    d_loss = d_loss.mean()        
    d_updates = lasagne.updates.adagrad(d_loss, d_params, d_specs['learning_rate'])
    d_train_fn = theano.function(
        inputs=[d_in_X, d_in_M, g_in_Z, g_in_M, g_in_C, d_target, g_target],
        outputs=[d_loss, d_x, g_z, d_g_z, d_target, g_target], updates=d_updates) 
    d_predict_fn = None
#   d_predict_fn = theano.function(
#        inputs=[d_in_X, d_in_M],
#        outputs=lasagne.layers.get_output(discriminator[2],
#                                          inputs={discriminator[0]: d_in_X, 
#                                                  discriminator[1]: d_in_M}))

    return d_train_fn, d_predict_fn, g_train_fn, g_sample_fn

In [12]:
def sample_noise_batch(batch_size, min_len, max_len, n_timesteps, n_features, n_conditions):
    noises = np.random.normal(size=(batch_size, n_timesteps, n_features)).astype('float32') 
    
    # one hot for condition
    conditions = np.zeros((batch_size, n_timesteps, n_conditions), dtype=np.float32)
    conditions[:, :, 2] = 1

    # create random masks
    masks = np.zeros((batch_size, max_len), dtype=np.int32)
    r_ints = np.random.randint(min_len, max_len, batch_size)
    for i in xrange(batch_size):
        masks[i, :r_ints[i]] = 1

    # create labels
    lbls = np.zeros((batch_size, n_conditions), dtype=np.float32)
    lbls[:, n_conditions-1] = 1
        
    return noises, masks, conditions, lbls


def sample_data_batch(batch_size, inputs, targets, masks, lbls, conds=None):
    excerpt = np.random.permutation(len(inputs))[:batch_size]
    if conds is None:
        return inputs[excerpt], targets[excerpt], masks[excerpt], lbls[excerpt]
    else:
        return inputs[excerpt], targets[excerpt], masks[excerpt], conds[excerpt], lbls[excerpt], 
    
    
def sample_melodies(nrow, ncol, batch_size, min_len, max_len, n_timesteps, n_features, n_conditions):
    g_Z, g_M, g_C, _ = sample_noise_batch(batch_size, min_len, max_len, n_timesteps, n_features, n_conditions)
    melodies = g_sample_fn(g_Z, g_M, g_C)
    ids = np.random.randint(0, len(melodies), batch_size)
    fig = plt.figure(figsize=(16, 8))
    for i in range(nrow * ncol):
        plt.subplot(nrow, ncol, i+1)
        sbn.heatmap(melodies[ids[i]].T, annot=True).invert_yaxis()

    return fig


def sample_probas(batch_size, inputs, targets, masks):
    plt.hist(d_predict(sample_data_batch(batch_size)).ravel(), 
             label='D(x)', alpha=0.5)

    noises, _, conditions = sample_noise_batch(100, min_len, max_len, n_timesteps, n_features, n_conditions)    
    plt.hist(d_predict(g_output_fn(noises, masks, conditions)), 
             label='D(G(z))',alpha=0.5)
    plt.legend(loc='best')
    plt.xlim(0,1)
    plt.show()

In [13]:
discriminator = build_discriminator(d_specs)
generator = build_generator(g_specs)

In [14]:
d_train_fn, d_predict_fn, g_train_fn, g_sample_fn = build_training(discriminator, generator, d_specs, g_specs)

In [15]:
# store losses
d_losses = [0]
g_losses = [0]

n_epochs = 2000
n_d_epochs = 1
n_g_epochs = 1

for epoch in tqdm(range(n_epochs)):
    for i in range(n_d_epochs):
        d_X, _,  d_M, d_L = sample_data_batch(d_specs['batch_size'], input_data,target_data, masks_data, lbls_data)
        g_Z, g_M, g_C, g_L = sample_noise_batch(g_specs['batch_size'], min_len, max_len, n_timesteps, n_features, n_conditions)
        d_loss, d_x, g_z, d_g_z, d_label, g_label = d_train_fn(d_X, d_M, g_Z, g_M, g_C, d_L, g_L)        
        d_losses.append((d_losses[-1] + float(d_loss)) / float(len(d_losses)))

    for i in range(n_g_epochs):
        g_Z, g_M, g_C, g_L = sample_noise_batch(g_specs['batch_size'], min_len, max_len, n_timesteps, n_features, n_conditions)
        g_loss = g_train_fn(g_Z, g_M, g_C, g_L)
        g_losses.append((g_losses[-1] + float(g_loss)) / float(len(g_losses)))        

    if epoch % 5 == 0:
        fig, axes = plt.subplots(3, 2, figsize=(16, 12))
        axes[0, 0].set_title('D(x)')
        sbn.heatmap(d_x.T, ax=axes[0, 0])
        axes[1, 0].set_title('D label')        
        sbn.heatmap(d_label.T, ax=axes[1, 0])
        axes[0, 1].set_title('D(G(z))')        
        sbn.heatmap(d_g_z.T, ax=axes[0, 1])
        axes[1, 1].set_title('G label')                
        sbn.heatmap(g_label.T, ax=axes[1, 1])
        axes[2, 0].set_title('Loss(d)')                        
        axes[2, 0].plot(d_losses)
        axes[2, 1].set_title('Loss(g)')                        
        axes[2, 1].plot(g_losses)      
        fig.savefig('images/epoch_{}'.format(epoch))
        plt.close('all') 
    
        display.clear_output(wait=True)
        fig = sample_melodies(2, 2, g_specs['batch_size'], min_len, max_len, n_timesteps, n_features, n_conditions)
        fig.savefig('images/samples_{}'.format(epoch))
        plt.close('all')

100%|██████████| 2000/2000 [49:27<00:00,  1.48s/it]
