In [2]:
import os
from util import data
from pdb import set_trace
from multiprocessing import pool

%load_ext autoreload
%autoreload 2

default_pool_size = max(1, os.cpu_count()-2)

music21: Certain music21 functions might need these optional packages: matplotlib, scipy;
                   if you run into errors, install them by following the instructions at
                   http://mit.edu/music21/doc/installing/installAdditional.html


In [3]:
scores = data.load_data()

Serialized scores found, loading...
Scores loaded in 27.68 seconds.


In [4]:
dataset = data.HaydnDataset(data=scores)

Building dataset...
Finished building dataset in 6.98 seconds.


In [5]:
states = list(filter(lambda ds: ds.shape[1] != 0, dataset))
total_ticks = sum(map(lambda state: state.shape[1], states))
print("There are {} final corpi with a total of {} ticks.".format(len(states), total_ticks))

There are 81 final corpi with a total of 144076 ticks.


In [76]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F  # useful stateless functions
import numpy as np
from torch.utils import data

In [77]:
class Center_Model(nn.Module):
    def __init__(self, center_input_size, center_output_size):
        super(Center_Model, self).__init__()
        
        self.center_affine = nn.Linear(center_input_size, center_output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        
    def forward(self, center_input):
        
        center_out = self.center_affine(center_input)
        output = self.relu(center_out)
        output = self.softmax(output)
        
        return output
  

In [78]:
class Result_Model(nn.Module):
    def __init__(self, merged_input_size, output_size):
        super(Result_Model, self).__init__()
        
        self.affine = nn.Linear(merged_input_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        
    def forward(self, merged_input):
        
        out = self.affine(merged_input)
        output = self.relu(out)
        output = self.softmax(output)
        
        return output

In [79]:
class Left_Model(nn.Module):
    def __init__(self, lstm_input_size, lstm_hidden_size, output_size):
        super(Left_Model, self).__init__()
        
        self.left_lstm = nn.LSTM(lstm_input_size, lstm_hidden_size)
        self.affine = nn.Linear(lstm_hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        
    def forward(self, left_seq):
        '''
        input_size = 66*3
        LSTM input shape: [sqd_len, batch=1, input_size]
        1 = num_layers * num_directions
        LSTM hn output shape: [1, batch, hidden_size]
        '''
        
        left_out, (left_hn, left_cn) = self.left_lstm(left_seq)
        out = self.affine(left_hn)
        output = self.relu(out)
        output = self.softmax(output)
        
        return output

In [80]:
class Right_Model(nn.Module):
    def __init__(self, lstm_input_size, lstm_hidden_size, output_size):
        super(Right_Model, self).__init__()
        
        self.right_lstm = nn.LSTM(lstm_input_size, lstm_hidden_size)
        self.affine = nn.Linear(lstm_hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        
    def forward(self, right_seq):
        '''
        input_size = 66*3
        LSTM input shape: [sqd_len, batch=1, input_size]
        1 = num_layers * num_directions
        LSTM hn output shape: [1, batch, hidden_size]
        '''
        right_out, (right_hn, right_cn) = self.right_lstm(right_seq)
        out = self.affine(right_hn)
        output = self.relu(out)
        output = self.softmax(output)
        
        return output

In [81]:
def train(left_model, right_model, center_model, result_model, 
          left_tensor, right_tensor, center_tensor, 
          left_target_tensor, right_targte_tensor, center_target_tensor, 
          opt_left, opt_right, opt_center, opt_affine):        
    
    left_output = left_model(left_tensor,)
    right_output = right_model(right_tensor)
    center_output = center_model(center_tensor)
    
    
    left_output = left_output.view(1, -1)
    right_output = right_output.view(1, -1)
    print('left_output shape: {x}'.format(x=left_output.shape))
    print('center_output shape: {x}'.format(x=center_output.shape))
    print('right_output shape: {x}'.format(x=right_output.shape))
    comobined_result = torch.cat((left_output, center_output, right_output), 1)
    
    
    result_output = result_model(comobined_result)
    
    '''
    TODO: Remove the linear layer in the Left and Right model. Then we can pass in the correct left right targets
    to calculate loss!
    
    After Left and Right LSTM model is modified, change left and right model to use the correct target.
    
    '''
    print('left_output shape: {x}'.format(x=left_output.shape))
    print('center_target_tensor shape: {x}'.format(x=center_target_tensor.shape))
    
    left_loss = F.nll_loss(left_output, center_target_tensor)
    opt_left.zero_grad()
    left_loss.backward()
    opt_left.step()
    
    right_loss = F.nll_loss(right_output, center_target_tensor)
    opt_right.zero_grad()
    right_loss.backward()
    opt_right.step()
    
    center_loss = F.nll_loss(center_output, center_target_tensor)
    opt_center.zero_grad()
    center_loss.backward()
    opt_center.step()
    
    result_loss = F.nll_loss(result_output, center_target_tensor)
    opt_affine.zero_grad()
    result_loss.backward()
    opt_affine.step()
        
    return result_output, left_loss.item(), right_loss.item(), center_loss.item(), result_loss.item()

In [82]:
def findNoneZeroIndex(m):
    '''
    m is 2 dimentional numpy array
    '''
    x, y = m.shape
#     print('m shape: {x}'.format(x=m.shape))
    n = np.zeros((1, x))
    for i in range (x):
        m_row = m[i, :]
        m_row_clipped = np.delete(m_row, -1)
        
        print('m_row_clipped shape: {x}'.format(x=m_row_clipped.shape))
        print(m_row_clipped)
        index = np.nonzero(m_row_clipped)[0]
        print('idx: {x}'.format(x=index))
        n[:, i] = index
    return n

In [83]:
def getTargetIndexTensor(left, right, c_target_tensor):
    target_tensor = torch.from_numpy(c_target_tensor.reshape(1, -1)).float()
    '''
    TODO: change to use delete -1. 
    Another note: this is ok actually. this way the result keeps the correct shape.
    '''
    center_target_tensor = target_tensor[:, 0:data_dim]
    center_target_index_tensor = (center_target_tensor.flatten()==1).nonzero()
    
    left_target = findNoneZeroIndex(left)
    right_target = findNoneZeroIndex(right)
    right_index_target = np.flip(right_target, axis=1)
    
    left_index_target_tensor = torch.from_numpy(left_target).float()
    right_index_target_tensor = torch.from_numpy(right_index_target.copy()).float()
    
    return (left_index_target_tensor, right_index_target_tensor, center_target_index_tensor)

In [84]:
def generateMiniBatchFromOneMusicForPart(music_data, seq_length, part):
    '''    
    input: music_data. A numpy array with shape [4, tick, 66]
    return: a list contains miniBatch data for one part. Each item in the miniBatch list contains:
            - left_tensor: shape [seq_length, 1, D]
            - right_tensor: shape [seq_length, 1, D]
            - center_tensor: shape [1, D]
            - left_target_tensor: shape [1, seq_length ]
            - right_targte_tensor: shape [1, seq_length ]
            - center_target_tensor: shape [1, 1]

        D = size of array contains the data from three other parts
                
    '''
    miniBatches = []
    
    music_length = music_data.shape[1]
    i = seq_length # TODO: maybe add functinality to pad the beginning and end.
    
    while (i + seq_length) < music_length:
        left_range_start = i-seq_length
        left_range_end = i
        center_index = i
        right_range_start = i + 1
        right_range_end = right_range_start + seq_length
        
        left_range = music_data[:, left_range_start:left_range_end, :]
        left_result = np.delete(left_range, part, axis=0)
        left_result = np.swapaxes(left_result, 0, 1)
        seq_l, part_l, notes_l = left_result.shape
        left_tensor = torch.from_numpy(left_result.reshape(seq_l, 1, part_l*notes_l)).float()
        
        right_range = music_data[:, right_range_start:right_range_end, :]
        right_result = np.delete(right_range, part, axis=0)     
        right_result = np.swapaxes(right_result, 0, 1)
        seq_r, part_r, notes_r = right_result.shape 
        right = right_result.reshape(seq_r, 1, part_r*notes_r)
#         print(right)
        right = np.flip(right, axis=2).copy()
        right_tensor = torch.from_numpy(right).float()
        
        center_result = music_data[:, i, :]
        center_result = np.delete(center_result, part, axis=0)
        part_c, notes_c = center_result.shape
        center_tensor = torch.from_numpy(center_result.reshape(1, part_c * notes_c)).float()
        
        center_target_range = music_data[part, i+1, :]
        '''
        left range and right range are in dimension [seg_length, 66]
        '''
        left_target_tensor_range = music_data[part, left_range_start+1:left_range_end+1, :]
        right_target_tensor_range = music_data[part, right_range_start-1:right_range_end-1, :]
        
        left_index_target_tensor, right_index_target_tensor, center_index_target_tensor = getTargetIndexTensor(
            left_target_tensor_range, right_target_tensor_range, center_target_range)
        
        miniBatches.append((left_tensor, right_tensor, center_tensor, left_index_target_tensor, right_index_target_tensor, center_index_target_tensor))
        
        i += 1
        
    return miniBatches
    

In [85]:
'''
mini generateMiniBatchFromOneMusicForPart test
'''
# fake_data = np.array([[
#         [1, 0, 0],
#         [0, 1, 0],
#         [0, 1, 0],
#         [1, 0, 0],
#         [0, 0, 1],
#         [0, 1, 0],
#         [0, 0, 1]],

#        [[1, 0, 0],
#         [1, 0, 0],
#         [0, 1, 0],
#         [0, 1, 0],
#         [1, 0, 0],
#         [0, 0, 1],
#         [0, 1, 0]],

#        [[1, 0, 0],
#         [0, 1, 0],
#         [0, 0, 1],
#         [0, 1, 0],
#         [0, 1, 0],
#         [1, 0, 0],
#         [1, 0, 0]],

#        [[0, 1, 0],
#         [0, 0, 1],
#         [0, 1, 0],
#         [0, 0, 1],
#         [1, 0, 0],
#         [0, 1, 0],
#         [0, 1, 0]]])
# # print('fake_data')
# # print(fake_data)

# r = generateMiniBatchFromOneMusicForPart(fake_data, 3, 0)
# lt, rt, ct, lt_t, rt_t, ct_t = r[0]

'\nmini generateMiniBatchFromOneMusicForPart test\n'

In [86]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [87]:
demo_piece = dataset.__getitem__(3) #numpy array
print('demo_piece shape: {x}'.format(x=demo_piece.shape))
parts, ticks, data_dim = demo_piece.shape

lstm_input_size = data_dim * 3
lstm_hidden_size = 200
center_input_size = data_dim * 3
center_output_size = 100
output_size = data_dim

n_iters = 2000
print_every = 100
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters
learning_rate = 0.005


left_model = Left_Model(lstm_input_size, lstm_hidden_size, output_size)
right_model = Right_Model(lstm_input_size, lstm_hidden_size, output_size)
center_model = Center_Model(center_input_size, center_output_size)
result_model = Result_Model(center_output_size+output_size*2, output_size)


left_opt = optim.SGD(left_model.parameters(), lr=learning_rate)
right_opt = optim.SGD(right_model.parameters(), lr=learning_rate)
center_opt = optim.SGD(center_model.parameters(), lr=learning_rate)
result_opt = optim.SGD(result_model.parameters(), lr=learning_rate)


seq_length = 8

demo_piece_minibatch_pt1 = generateMiniBatchFromOneMusicForPart(demo_piece, seq_length, 0)
demo_piece_minibatch_pt2 = generateMiniBatchFromOneMusicForPart(demo_piece, seq_length, 1)
demo_piece_minibatch_pt3 = generateMiniBatchFromOneMusicForPart(demo_piece, seq_length, 2)
demo_piece_minibatch_pt4 = generateMiniBatchFromOneMusicForPart(demo_piece, seq_length, 3)
miniBatch_length = len(demo_piece_minibatch_pt1)
print("minibatch list length: {x}".format(x=miniBatch_length))

#Training for Part 1
start = time.time()
ite = 0
'''
IMPORTANT: Model, Optimizer must match!
'''

'''
NOTE: This is only trainning for part 1, with data from one music piece.

TODO:
(1) generate minibatch from other pieces
(2) train for other parts
(3) write sampling function

'''
for n in range(n_iters):
    if ite/miniBatch_length >= 1:
        ite = 0
            
    left_tensor, right_tensor, center_tensor, left_target_tensor, right_targte_tensor, center_target_tensor = demo_piece_minibatch_pt1[ite]

#     print("train left_tensor size: {x}".format(x=left_tensor.size()))
    output, left_loss, right_loss, center_loss, result_loss = train(left_model, right_model, center_model, result_model, 
          left_tensor, right_tensor, center_tensor, 
          left_target_tensor, right_targte_tensor, center_target_tensor, 
          left_opt, right_opt, center_opt, result_opt)
    total_loss += result_loss
    
    if (n+1) % print_every == 0:
            print('%s (%d %d%%) loss: %.4f' % (timeSince(start), n, n / n_iters * 100, loss))
            print("output: {x}".format(x=output))
            
    if (n+1) % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0
    
    ite += 1
    n += 1


demo_piece shape: (4, 1541, 66)
m_row_clipped shape: (65,)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
idx: [49 64]


ValueError: could not broadcast input array from shape (2) into shape (1)