## Prepare data files

In [1]:
import os
from os import listdir
from os.path import isfile, join

data_dir = 'dataset/'

data_files = []

data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f)) if '.npz' in f]

data_files.sort()

print('total data files : ', len(data_files))


total data files :  1282


## Hyperparameters declaration

In [2]:
IntervalDim = 100

VelocityDim = 32
VelocityOffset = IntervalDim

NoteOnDim = NoteOffDim = 128
NoteOnOffset = IntervalDim + VelocityDim
NoteOffOffset = IntervalDim + VelocityDim + NoteOnDim

EventDim = NoteOnDim + NoteOffDim + IntervalDim + VelocityDim # 388

Time = 650

EmbeddingDim = 512

HeadDim = 32
Heads = 16
ContextDim = HeadDim * Heads # 512


## Data load

In [3]:
import numpy as np

def get_data(length=Time):
    index = np.random.randint(0, len(data_files))
    data = np.load(data_files[index])['eventlist']
    
    # time augmentation
    data[:, 0] *= np.random.uniform(0.95, 1.05)
    
    # absolute time to relative interval
    data[1:, 0] = data[1:, 0] - data[:-1, 0]
    data[0, 0] = 0
    
    # discretize interval into IntervalDim
    data[:, 0] = np.clip(np.round(data[:, 0] * IntervalDim), 0, IntervalDim - 1)
    
    # Note augmentation
    data[:, 2] += np.random.randint(-6, 6)
    data[:, 2] = np.clip(data[:, 2], 0, NoteOnDim)
    
    eventlist = []
    for d in data:
        # append interval
        interval = d[0]
        eventlist.append(interval)
    
        # note on case
        if d[1] == 1:
            velocity = d[3] + VelocityOffset
            note = d[1] + NoteOnOffset
            eventlist.append(velocity)
            eventlist.append(note)
            
        # note off case
        elif d[1] == 0:
            note = d[1] + NoteOffOffset
            eventlist.append(note)
            
    eventlist = np.array(eventlist).astype(np.int)
    
    if len(eventlist) > (length+1):
        start_index = np.random.randint(0, len(eventlist) - (length+1))
        eventlist = eventlist[start_index:start_index+(length+1)]
        
    # pad zeros
    if len(eventlist) < (length+1):
        pad = (length+1) - len(eventlist)
        eventlist = np.pad(eventlist, (pad, 0), 'constant')
        
    x = eventlist[:length]
    y = eventlist[1:length+1]
    
    return x, y
    
x, y = get_data()
print('x shape : ', x.shape)
print('y shape : ', y.shape)
print(x)
print(y)
    

x shape :  (650,)
y shape :  (650,)
[  3 260   5 198 133   1 171 133   2 260   1 260   2 198 133   4 260   4
 193 133   1 185 133   5 260   1 196 133   0 260   3 260   6 193 133   0
 185 133   4 260   2 260   1 200 133   5 260   3 195 133   1 185 133   3
 260   0 260   4 195 133   2 260   3 189 133   1 186 133   2 260   2 141
 133   1 260   0 199 133   2 260   0 260  36 208 133   1 202 133   0 201
 133   0 200 133   2 260   2 260   8 260   9 260  30 202 133   1 203 133
   1 187 133   0 175 133   2 163 133   1 260   0 260   1 260   1 260   1
 260   4 161 133   3 260   1 177 133   2 146 133   5 260   1 167 133   2
 161 133   2 260   1 260   3 172 133   2 260   2 260   1 171 133   1 179
 133   2 260   3 173 133   2 260   1 260   1 159 133   2 177 133   3 176
 133   1 260   2 158 133   1 260   0 260   2 175 133   2 260   2 260   3
 169 133   0 178 133   4 260   0 166 133   1 260   2 167 133   1 178 133
   1 260   0 171 133   0 260   2 178 133   1 260   3 260   1 177 133   2
 260   1 260   

## Model

In [None]:
# thanks to @openai: https://github.com/openai/gpt-2/blob/master/src/model.py
def shape_list(x):
    """Deal with dynamic shape in tensorflow cleanly."""
    static = x.shape.as_list()
    dynamic = tf.shape(x)
    return [dynamic[i] if s is None else s for i, s in enumerate(static)]

def model(inputs):
    with tf.variable_scope('model'):
        # inputs : [Batch, Time, EmbeddingDim]
        
        Batch, _, _ = shape_list(inputs)
        
        # [Heads, Batch, Time, HeadDim]
        Q = tf.stack([tf.layers.dense(inputs, HeadDim, use_bias=False) for _ in range(Heads)])
        K = tf.stack([tf.layers.dense(inputs, HeadDim, use_bias=False) for _ in range(Heads)])
        V = tf.stack([tf.layers.dense(inputs, HeadDim, use_bias=False) for _ in range(Heads)])
        
        '''
        [E_(-T+1), ..., E_0]
        '''
        E = tf.get_variable('E', [Heads, Time, HeadDim])
        
        # [Heads, Batch * Time, HeadDim]
        Q_ = tf.reshape(Q, [Heads, Batch * Time, HeadDim])
        
        # [Heads, Batch * Time, Time]
        S = tf.matmul(Q_, E, transpose_b=True)
        # [Heads, Batch, Time, Time]
        S = tf.reshape(S, [Heads, Batch, Time, Time])
        # [Heads, Batch, Time, Time+1]
        S = tf.pad(S, ((0, 0), (0, 0), (0, 0), (1, 0)))
        # [Heads, Batch, Time+1, Time]
        S = tf.reshape(S, [Heads, Batch, Time+1, Time])
        # [Heads, Batch, Time, Time]
        S = S[:, :, 1:]
        
        # [Heads, Batch, Time, Time]
        attention = (tf.matmul(Q, K, transpose_b=True) + S) / np.sqrt(HeadDim)
        
        '''
        # [Time, Time]  [[1 0 0 0]
                         [1 1 0 0]
                         [1 1 1 0]
                         [1 1 1 1]]
        '''
        mask = tf.matrix_band_part(tf.ones([Time, Time]), -1, 0)
        
        # [Heads, Batch, Time, Time]
        attention = attention * mask - tf.cast(1e10, attention.dtype) * (1-mask)
        # [Heads, Batch, Time, Time]
        score = tf.nn.softmax(attention, axis=3)
        
        # [Heads, Batch, Time, HeadDim]
        context = tf.matmul(score, V)
        # [Batch, Time, Heads, HeadDim]
        context = tf.transpose(context, [1, 2, 0, 3])
        # [Batch, Time, ContextDim]
        context = tf.reshape(context, [Batch, Time, ContextDim])
        # [Batch, Time, ContextDim]
        context = tf.layers.dense(context, ContextDim, tf.nn.relu)
        # [Batch, Time, EventDim]
        logits = tf.layers.dense(context, EventDim)
        
        return logits
    

## Draw Graph

In [None]:
import tensorflow as tf
tf.reset_default_graph()

'''INPUTS'''
# [Batch, Time]
inputs = tf.placeholder(dtype=tf.int32, shape=[None, Time])
# [Batch, Time]
targets = tf.placeholder(dtype=tf.int32, shape=[None, None])

# [Batch, Time, EventDim], for the use of visualization
inputs_onehot = tf.one_hot(inputs, axis=2, depth=EventDim)

'''EMBEDDING'''
embedding = tf.get_variable('embedding', [EventDim, EmbeddingDim])
# [Batch, Time, EventDim]
inputs_embedding = tf.gather(embedding, inputs)

'''GET LOGITS'''
# [Batch, Time, EventDim]
logits = model(inputs_embedding)

# for the use of visualization
probs = tf.nn.softmax(logits, axis=2)

'''LOSS'''
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits)
loss = tf.reduce_mean(cross_entropy)

'''SAMPLING'''
dist = tf.distributions.Categorical(logits=logits[:, -1])
# [Batch, 1]
sample = dist.sample()

'''TRAIN'''
global_step = tf.Variable(0, name='global_step')
learning_rate = tf.Variable(1e-3, name='learning_rate')
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step)

'''SESSION OPEN'''

sess_config = tf.ConfigProto()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9, visible_device_list= '0')
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(tf.global_variables_initializer())

print('graph created')

In [None]:
load_dir = 'save/music-transformer'
save_dir = 'save/music-transformer'

saver = tf.train.Saver()

if True:
    restore_file = tf.train.latest_checkpoint(load_dir)
    if restore_file is not None:
        saver.restore(sess, restore_file)
        print("Model restored.")
    else:
        print('model not exist.')
        

In [None]:
from tensorboardX import SummaryWriter

class Logger(SummaryWriter):
    def __init__(self, logdir):
        super(Logger, self).__init__(logdir)

    def log(self, log_string, value, iteration):
            self.add_scalar(log_string, value, iteration)
            
logger = Logger(save_dir)            

In [None]:
from IPython.display import clear_output
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import librosa.display
from time import sleep
import time

batch_size = 64
def get_batch_data(batch_size, time):
    _inputs = []
    _targets = []
    for _ in range(batch_size):
        x, y = get_data(time)
        _inputs.append(x)
        _targets.append(y)
        
    _inputs = np.stack(_inputs)
    _targets = np.stack(_targets)
    
    return _inputs, _targets
    
while(True):
    for _ in range(100):
        _inputs, _targets = get_batch_data(batch_size, Time)
        print(_inputs.shape, _targets.shape)
        
        _, _global_step, _loss = sess.run([train_step, global_step, loss], 
                                          feed_dict={inputs: _inputs, 
                                                     targets: _targets,
                                                     learning_rate: 1e-3})
        print('step : ', _global_step, 'loss : ', _loss)
        
        if _global_step % 10 == 0:
            logger.log('loss', _loss, _global_step)
        
        if _global_step % 1000 == 0:
            save_path = saver.save(sess, save_dir + '/checkpoint', global_step=_global_step)
            print("Model saved in path: %s" % save_path)
        
    clear_output()
    
    _inputs_onehot, _probs, _h = sess.run([inputs_onehot, probs, h], feed_dict={inputs: _inputs})
    
    plt.figure(figsize=[18, 10])
    librosa.display.specshow(_inputs_onehot[0].T)
    plt.show()
    
    plt.figure(figsize=[18, 10])
    librosa.display.specshow(_probs[0].T)
    plt.show()
    
    