In [1]:
import sys
sys.path.insert(0, '../src')

from loaders import *
from episode import *
from dataset import *
from collections import Counter
from torch import nn
from torch.autograd import Variable

import numpy as np
import torch
import torch.nn.functional as F
import json
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
""" Some global variables """
_loader = Loader(502) # 500 + SOS + EOS
loader = MIDILoader(_loader)

use_cuda = torch.cuda.is_available()
# Is the tokenizer 1 indexed?
vocabulary_size = 16*128*2 + 32*16 + 100 + 1 # 4708 + 1
vocabulary_size = vocabulary_size + 2 # SOS (index 4709) and EOS (index 4710)
SOS_TOKEN = 4709
EOS_TOKEN = 4710

encoding_size = 500
one_hot_embeddings = np.eye(vocabulary_size)

In [3]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [4]:
# The next two functions are part of some other deep learning frameworks, but PyTorch
# has not yet implemented them. We can find some commonly-used open source worked arounds
# after searching around a bit: https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1.
def _sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()
    batch_size = sequence_length.size(0)
    seq_range = torch.arange(0, max_len).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    seq_range_expand = Variable(seq_range_expand)
    if sequence_length.is_cuda:
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1)
                         .expand_as(seq_range_expand))
    return seq_range_expand < seq_length_expand


def compute_loss(logits, target, length):
    """
    Args:
        logits: A Variable containing a FloatTensor of size
            (batch, max_len, num_classes) which contains the
            unnormalized probability for each class.
        target: A Variable containing a LongTensor of size
            (batch, max_len) which contains the index of the true
            class for each corresponding step.
        length: A Variable containing a LongTensor of size (batch,)
            which contains the length of each data in a batch.

    Returns:
        loss: An average loss value masked by the length.
    """
    # logits_flat: (batch * max_len, num_classes)
    logits_flat = logits.view(-1, logits.size(-1))
    # log_probs_flat: (batch * max_len, num_classes)
    log_probs_flat = F.log_softmax(logits_flat)
    # target_flat: (batch * max_len, 1)
    target_flat = target.view(-1, 1)
    # losses_flat: (batch * max_len, 1)
    losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
    # losses: (batch, max_len)
    losses = losses_flat.view(*target.size())
    # mask: (batch, max_len)
    mask = _sequence_mask(sequence_length=length, max_len=target.size(1))
    losses = losses * mask.double()
    loss = losses.sum() / length.double().sum()
    return loss

In [5]:
class EncoderLSTM(nn.Module):
    # Your code goes here
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size).double()
        
    def forward(self, input, hidden):
        _, hidden_out = self.lstm(input, hidden) # encoder only outputs hidden
        return hidden_out
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [6]:
class DecoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size).double()
        self.out = nn.Linear(hidden_size, output_size).double()

    def forward(self, input, hidden):
        output = F.relu(input)
        output, hidden = self.lstm(output, hidden)
        output = self.out(output)
        return output[0], hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [7]:
class Learner(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 learning_rate,
                 embeddings=one_hot_embeddings):
        super(Learner,self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.encoder = EncoderLSTM(input_size, hidden_size)
        self.decoder = DecoderLSTM(input_size, hidden_size, output_size)
        self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(), lr=learning_rate)
        self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), lr=learning_rate)
        
        self.embeddings = embeddings
        self.criterion = nn.CrossEntropyLoss()
        
    def forward(self, token_seq):
        loss = 0
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()
        
        encoder_hidden = Variable(self.encoder.initHidden()).double()
        encoder_output = Variable(self.encoder.initHidden()).double()
        if use_cuda:
            encoder_hidden = encoder_hidden.cuda()
            encoder_output = encoder_output.cuda()
        
        hidden = (encoder_output, encoder_hidden)
        token_seq.reverse()
        for token in token_seq:
            encoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, 1, -1).double()
            encoder_input = encoder_input.cuda() if use_cuda else encoder_input
            hidden = self.encoder(encoder_input, hidden)
        
        encoder_output, encoder_hidden = hidden
            
        decoder_input = Variable(torch.from_numpy(np.array(self.embeddings[SOS_TOKEN]))).double()
        decoder_output = Variable(self.decoder.initHidden()).double()
        if use_cuda:
            decoder_input = decoder_input.cuda()
            decoder_output = decoder_output.cuda()
        
        hidden = (decoder_output, encoder_hidden)
        token_seq.reverse()
        targets = token_seq + [EOS_TOKEN]
        seq_var = Variable(torch.from_numpy(np.array(targets)))
        seq_var = seq_var.cuda() if use_cuda else seq_var
        for i, token in enumerate(targets):
            decoder_input = decoder_input.squeeze().view(1, 1, -1)
            decoder_output, hidden = self.decoder(decoder_input, hidden)
            loss += self.criterion(decoder_output, seq_var[i])
            if token == EOS_TOKEN:
                break
                
            # Teacher forcing
            target = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, -1).double()
            if use_cuda:
                target = target.cuda()
            decoder_input = target
        
        loss.backward()
        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        
        return loss.data[0] / len(token_seq)
    
    def map_inference(self, token_seq):
        encoder_hidden = Variable(self.encoder.initHidden()).double()
        encoder_output = Variable(self.encoder.initHidden()).double()
        if use_cuda:
            encoder_hidden = encoder_hidden.cuda()
            encoder_output = encoder_output.cuda()
        
        hidden = (encoder_output, encoder_hidden)
        token_seq.reverse()
        for token in token_seq:
            encoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, 1, -1).double()
            encoder_input = encoder_input.cuda() if use_cuda else encoder_input
            hidden = self.encoder(encoder_input, hidden)
        
        encoder_output, encoder_hidden = hidden
            
        token = SOS_TOKEN
        
        decoder_output = Variable(self.decoder.initHidden()).double()
        if use_cuda:
            decoder_output = decoder_output.cuda()
        
        hidden = (decoder_output, encoder_hidden)

        token_seq = []
        for i in range(500):
            decoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).double()
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            decoder_input = decoder_input.squeeze().view(1, 1, -1)
            decoder_output, hidden = self.decoder(decoder_input, hidden)
            topv, topi = decoder_output.data.topk(1)
            token = int(topi)
            print("Iteration: %d, Prediction: %d" % (i, token))
            if token == EOS_TOKEN:
                break
                
            token_seq.append(token)
            
        return token_seq

In [8]:
learner = Learner(vocabulary_size, 
              encoding_size, 
              vocabulary_size,
              learning_rate=0.01)

learner = learner.cuda() if use_cuda else learner
eps = load_sampler_from_config("../src/config.yaml")
song = eps.get_episode().support[0][0]
song = list(song)

Lenny Kravitz whatgoesaroundcomesaround.mid
Lenny Kravitz dontgoandputabulletinyourhead.mid
Lenny Kravitz callingallangels.mid
Lenny Kravitz alwaysontherun.mid
Lenny Kravitz letloverule.mid
Lenny Kravitz fieldsofjoy.mid
Lenny Kravitz digin.mid
Lenny Kravitz again.mid
Lenny Kravitz destiny.mid
Lenny Kravitz itaintovertillitsover.mid
Lenny Kravitz stillnessofheart.mid


In [11]:
for i in range(300):
    print("Iteration: %d" % i)
    learner(song)
    

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
It

In [12]:
torch.save(learner.state_dict(), "../models/song.mod")

In [9]:
learner.load_state_dict(torch.load("../models/song.mod"))
learner.eval()
gen_seq = learner.map_inference(song[:100])
midi = loader.detokenize(np.array(song[:100] + gen_seq))
midi.write('test.mid')

Iteration: 0, Prediction: 4248
Iteration: 1, Prediction: 564
Iteration: 2, Prediction: 4344
Iteration: 3, Prediction: 958
Iteration: 4, Prediction: 668
Iteration: 5, Prediction: 4628
Iteration: 6, Prediction: 2612
Iteration: 7, Prediction: 567
Iteration: 8, Prediction: 4628
Iteration: 9, Prediction: 2622
Iteration: 10, Prediction: 567
Iteration: 11, Prediction: 4628
Iteration: 12, Prediction: 2615
Iteration: 13, Prediction: 564
Iteration: 14, Prediction: 4628
Iteration: 15, Prediction: 2612
Iteration: 16, Prediction: 567
Iteration: 17, Prediction: 4628
Iteration: 18, Prediction: 2622
Iteration: 19, Prediction: 567
Iteration: 20, Prediction: 4628
Iteration: 21, Prediction: 2615
Iteration: 22, Prediction: 564
Iteration: 23, Prediction: 4628
Iteration: 24, Prediction: 2612
Iteration: 25, Prediction: 567
Iteration: 26, Prediction: 4628
Iteration: 27, Prediction: 2622
Iteration: 28, Prediction: 567
Iteration: 29, Prediction: 4628
Iteration: 30, Prediction: 2615
Iteration: 31, Prediction: 56

Iteration: 324, Prediction: 4628
Iteration: 325, Prediction: 2370
Iteration: 326, Prediction: 2622
Iteration: 327, Prediction: 4172
Iteration: 328, Prediction: 322
Iteration: 329, Prediction: 574
Iteration: 330, Prediction: 4628
Iteration: 331, Prediction: 2368
Iteration: 332, Prediction: 2622
Iteration: 333, Prediction: 2716
Iteration: 334, Prediction: 4172
Iteration: 335, Prediction: 327
Iteration: 336, Prediction: 567
Iteration: 337, Prediction: 4628
Iteration: 338, Prediction: 2370
Iteration: 339, Prediction: 2622
Iteration: 340, Prediction: 4172
Iteration: 341, Prediction: 322
Iteration: 342, Prediction: 574
Iteration: 343, Prediction: 4628
Iteration: 344, Prediction: 2368
Iteration: 345, Prediction: 2622
Iteration: 346, Prediction: 2716
Iteration: 347, Prediction: 4172
Iteration: 348, Prediction: 327
Iteration: 349, Prediction: 567
Iteration: 350, Prediction: 4628
Iteration: 351, Prediction: 2370
Iteration: 352, Prediction: 2622
Iteration: 353, Prediction: 4172
Iteration: 354, Pr

In [None]:
""" Testing Learner """
print_every = 10
total_epochs = 2900
print_loss_total = 0
batch_size = 2
startTime = time.time()
for epoch in range(1, total_epochs+1):
    supports = eps.get_episode().support[0]
    input_variables = []
    original_sequences = []
    
    for i, support in enumerate(supports):
        trunc_seq = [SOS_TOKEN] + list(support) + [EOS_TOKEN]
        original_sequences.append(trunc_seq)
        seq_length = len(trunc_seq)

        trunc_seq = torch.from_numpy(np.array(one_hot_embeddings[trunc_seq])) # This is really time consuming

        trunc_seq = trunc_seq.view(seq_length, vocabulary_size)
        trunc_seq = Variable(trunc_seq)
        input_variables.append(trunc_seq)
    
    original_sequences = np.array(original_sequences, dtype=np.int64)

    for batch in range(len(input_variables)//batch_size):
        # lstm input is (seq_len, batch_size, vocab_size)
        start, end = batch*batch_size, (batch+1)*batch_size
        sequences = torch.stack(input_variables[start:end]).transpose(0,1)
        numbered_seqs = torch.stack(Variable(torch.from_numpy(original_sequences[start:end]))).transpose(0,1)
        if use_cuda:
            sequences = sequences.cuda()
            numbered_seqs = numbered_seqs.cuda()
        loss = learner.train(sequences, numbered_seqs, hidden=None)
        print_loss_total += loss
        del sequences, numbered_seqs
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d %d%%) %.4f' % (timeSince(startTime, epoch / total_epochs),
                                     epoch, epoch / total_epochs * 100, print_loss_avg))
        torch.save(learner.state_dict(), '../models/baseline_'+str(epoch))
        

In [None]:
import psutil
import gc

def memReport():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj) or torch.is_tensor(obj.data):
                print(type(obj), obj.size())
        except:
            pass
    
def cpuStats():
        print(sys.version)
        print(psutil.cpu_percent())
        print(psutil.virtual_memory())  # physical memory usage
        pid = os.getpid()
        py = psutil.Process(pid)
        memoryUse = py.memory_info()[0] / 2. ** 30  # memory use in GB...I think
        print('memory GB:', memoryUse)

cpuStats()
memReport()

In [13]:
gc.collect()

667

In [None]:
#input_files = ['bach_846.mid', 'mz_311_1.mid', 'rac_op3_2.mid']
input_variables = []
original_sequences = []

for index in range(0, 290):
    supports = eps.get_episode().support[0]
    
    for i, support in enumerate(supports):
        trunc_seq = [SOS_TOKEN] + list(support) + [EOS_TOKEN]
        original_sequences.append(trunc_seq)
        seq_length = len(trunc_seq)

        trunc_seq = torch.from_numpy(np.array(one_hot_embeddings[trunc_seq])) # This is really time consuming

        trunc_seq = trunc_seq.view(seq_length, vocabulary_size)
        trunc_seq = Variable(trunc_seq)
        if use_cuda:
            trunc_seq = trunc_seq.cuda()
        input_variables.append(trunc_seq)
    
original_sequences = np.array(original_sequences, dtype=np.int64)

In [None]:
learner.load_state_dict(torch.load('../models/baseline_'+epoch+'_'+print_loss_avg))

In [44]:
song_index = 0
test_seq = input_variables[song_index][:,1:101] # First dimension is batch
out_seq = learner.map_inference(test_seq, hidden=None)
whole_seq = original_sequences[song_index][0:100].tolist() + out_seq
midi = loader.detokenize(np.array(whole_seq))
midi.write('test.mid')

RuntimeError: invalid argument 2: size '[1 x 1 x 4711]' is invalid for input with 1 elements at /opt/conda/conda-bld/pytorch_1518244507981/work/torch/lib/TH/THStorage.c:41

In [42]:
test_hidden = learner.encoder.initHidden(None).squeeze()
print(torch.stack([test_hidden, test_hidden]).unsqueeze(0))

Variable containing:
( 0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
[torch.cuda.DoubleTensor of size 1x2x500 (GPU 0)]

