In [1]:
import sys
sys.path.insert(0, '../src')

import pickle
from loaders import *
from episode import *
from dataset import *
from collections import Counter
from torch import nn
from torch.autograd import Variable

import numpy as np
import torch
import torch.nn.functional as F
import json
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
""" Some global variables """
_loader = Loader(502) # 500 + SOS + EOS
loader = MIDILoader(_loader)

use_cuda = torch.cuda.is_available()
# Is the tokenizer 1 indexed?
vocabulary_size = 16*128*2 + 32*16 + 100 + 1 # 4708 + 1
vocabulary_size = vocabulary_size + 2 # SOS (index 4709) and EOS (index 4710)
SOS_TOKEN = 4709
EOS_TOKEN = 4710

encoding_size = 500
one_hot_embeddings = np.eye(vocabulary_size)

In [3]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [4]:
# The next two functions are part of some other deep learning frameworks, but PyTorch
# has not yet implemented them. We can find some commonly-used open source worked arounds
# after searching around a bit: https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1.
def _sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()
    batch_size = sequence_length.size(0)
    seq_range = torch.arange(0, max_len).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    seq_range_expand = Variable(seq_range_expand)
    if sequence_length.is_cuda:
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1)
                         .expand_as(seq_range_expand))
    return seq_range_expand < seq_length_expand


def compute_loss(logits, target, length):
    """
    Args:
        logits: A Variable containing a FloatTensor of size
            (batch, max_len, num_classes) which contains the
            unnormalized probability for each class.
        target: A Variable containing a LongTensor of size
            (batch, max_len) which contains the index of the true
            class for each corresponding step.
        length: A Variable containing a LongTensor of size (batch,)
            which contains the length of each data in a batch.

    Returns:
        loss: An average loss value masked by the length.
    """
    # logits_flat: (batch * max_len, num_classes)
    logits_flat = logits.view(-1, logits.size(-1))
    # log_probs_flat: (batch * max_len, num_classes)
    log_probs_flat = F.log_softmax(logits_flat)
    # target_flat: (batch * max_len, 1)
    target_flat = target.view(-1, 1)
    # losses_flat: (batch * max_len, 1)
    losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
    # losses: (batch, max_len)
    losses = losses_flat.view(*target.size())
    # mask: (batch, max_len)
    mask = _sequence_mask(sequence_length=length, max_len=target.size(1))
    losses = losses * mask.double()
    loss = losses.sum() / length.double().sum()
    return loss

In [5]:
class EncoderLSTM(nn.Module):
    # Your code goes here
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size).double()
        
    def forward(self, input, hidden):
        _, hidden_out = self.lstm(input, hidden) # encoder only outputs hidden
        return hidden_out
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [6]:
class DecoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size).double()
        self.out = nn.Linear(hidden_size, output_size).double()

    def forward(self, input, hidden):
        output = F.relu(input)
        output, hidden = self.lstm(output, hidden)
        output = self.out(output)
        return output[0], hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [7]:
class Learner(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 learning_rate,
                 embeddings=one_hot_embeddings):
        super(Learner,self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.encoder = EncoderLSTM(input_size, hidden_size)
        self.decoder = DecoderLSTM(input_size, hidden_size, output_size)
        self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(), lr=learning_rate)
        self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), lr=learning_rate)
        
        self.embeddings = embeddings
        self.criterion = nn.CrossEntropyLoss()
        
    def forward(self, token_seq):
        loss = 0
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()
        
        encoder_hidden = Variable(self.encoder.initHidden()).double()
        encoder_output = Variable(self.encoder.initHidden()).double()
        if use_cuda:
            encoder_hidden = encoder_hidden.cuda()
            encoder_output = encoder_output.cuda()
        
        hidden = (encoder_output, encoder_hidden)
        token_seq.reverse()
        for token in token_seq:
            encoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, 1, -1).double()
            encoder_input = encoder_input.cuda() if use_cuda else encoder_input
            #print("encoder_input: %d" % (np.where(encoder_input.data==1)[2][0]))
            hidden = self.encoder(encoder_input, hidden)
        
        encoder_output, encoder_hidden = hidden
            
        decoder_input = Variable(torch.from_numpy(np.array(self.embeddings[SOS_TOKEN]))).double()
        decoder_output = Variable(self.decoder.initHidden()).double()
        if use_cuda:
            decoder_input = decoder_input.cuda()
            decoder_output = decoder_output.cuda()
        
        hidden = (decoder_output, encoder_hidden)
        token_seq.reverse()
        targets = token_seq + [EOS_TOKEN]
        seq_var = Variable(torch.from_numpy(np.array(targets)))
        seq_var = seq_var.cuda() if use_cuda else seq_var
        for i, token in enumerate(targets):
            decoder_input = decoder_input.squeeze().view(1, 1, -1)
            #print("decoder_input: %d" % (np.where(decoder_input.data==1)[2][0]))
            decoder_output, hidden = self.decoder(decoder_input, hidden)
            #print("prediction: %d" % (int(decoder_output.topk(1)[1])))
            loss += self.criterion(decoder_output, seq_var[i])
            if token == EOS_TOKEN:
                break
                
            # Teacher forcing
            target = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, -1).double()
            if use_cuda:
                target = target.cuda()
            decoder_input = target
        
        loss.backward()
        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        
        return loss.data[0] / len(token_seq)
    
    def map_inference(self, token_seq):
        encoder_hidden = Variable(self.encoder.initHidden()).double()
        encoder_output = Variable(self.encoder.initHidden()).double()
        if use_cuda:
            encoder_hidden = encoder_hidden.cuda()
            encoder_output = encoder_output.cuda()
        
        hidden = (encoder_output, encoder_hidden)
        token_seq.reverse()
        for token in token_seq:
            encoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).view(1, 1, -1).double()
            encoder_input = encoder_input.cuda() if use_cuda else encoder_input
            hidden = self.encoder(encoder_input, hidden)
        
        encoder_output, encoder_hidden = hidden
            
        token = SOS_TOKEN
        
        decoder_output = Variable(self.decoder.initHidden()).double()
        if use_cuda:
            decoder_output = decoder_output.cuda()
        
        hidden = (decoder_output, encoder_hidden)

        token_seq = []
        for i in range(500):
            decoder_input = Variable(torch.from_numpy(np.array(self.embeddings[token]))).double()
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            decoder_input = decoder_input.squeeze().view(1, 1, -1)
            decoder_output, hidden = self.decoder(decoder_input, hidden)
            topv, topi = decoder_output.data.topk(1)
            token = int(topi)
            #print("Iteration: %d, Prediction: %d" % (i, token))
            if token == EOS_TOKEN:
                break
                
            token_seq.append(token)
            
        return token_seq

In [8]:
learner = Learner(vocabulary_size, 
              encoding_size, 
              vocabulary_size,
              learning_rate=0.01)

learner = learner.cuda() if use_cuda else learner
eps = load_sampler_from_config("../src/config.yaml")

Metallica dirtywindow.mid
Metallica myfriendofmisery.mid
Metallica killingtime.mid
Metallica aintmybitch.mid
Metallica kingnothing.mid
Metallica youregoingtohell.mid
Metallica forwhomthebelltolls.mid
Metallica harvesterofsorrow.mid
Metallica anesthesia(pullingteeth).mid
Metallica itselectric.mid
Metallica whiskeyinthejar.mid


In [None]:
print_every = 10
total_epochs = 2900
print_loss_total = 0
startTime = time.time()
for epoch in range(1, total_epochs+1):
    songs = eps.get_episode().support[0]
    for song in songs:
        song = list(song)
        loss = learner(song)
        print_loss_total += loss
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d %d%%) %.4f' % (timeSince(startTime, epoch / total_epochs),
                                     epoch, epoch / total_epochs * 100, print_loss_avg))
        torch.save(learner.state_dict(), '../models/baseline_'+str(epoch))

Nevermore believeinnothing.mid
Nevermore iamthedog.mid
Nevermore theheartcollector.mid
Nevermore thesoundofsilence.mid
Nevermore narcosynthesis.mid
Nevermore beyondwithin.mid
Nevermore tomorrowturnedintoyesterday.mid
Nevermore wedisintegrate.mid
Nevermore thefinalproduct.mid
Nevermore insidefourwalls.mid
Nevermore thesorrowedman.mid
Travis Tritt driftofftodream.mid
Travis Tritt anymore.mid
Travis Tritt countryclub.mid
Travis Tritt tenfeettallandbulletproof.mid
Travis Tritt wherecorndontgrow.mid
Travis Tritt canitrustyouwithmyheart.mid
Travis Tritt sometimessheforgets.mid
Travis Tritt thewiskeyaintworkinganymore.mid
Travis Tritt bestofintentions.mid
Travis Tritt foolishpride.mid
Travis Tritt biblebelt.mid
Creed one.mid
Creed higher.mid
Creed inamerica.mid
Creed torn.mid
Creed sayi.mid
Creed areyouready.mid
Creed insideusall.mid
Creed witharmswideopen.mid
Creed whatif.mid
Creed freedomfighter.mid
Creed mysacrafice.mid
Voivod wecarryon.mid
Voivod inleaguewithsatan.mid
Voivod tornado.mid
V

Matthieu Chedid lebluesdesouston.mid
Matthieu Chedid ausuivant.mid
Matthieu Chedid souvenirdufutur(live).mid
Matthieu Chedid jesuisunecigarette.mid
Matthieu Chedid lemechamac.mid
Matthieu Chedid mondevirtuel(live).mid
Matthieu Chedid lafleur(live).mid
Matthieu Chedid ondesensuelle.mid
Matthieu Chedid machistador.mid
Matthieu Chedid lerosepourpreducoeur.mid
Matthieu Chedid labonneetoile.mid
Devo thatsgood.mid
Devo coldwar.mid
Devo happyguy.mid
Devo itsabeautifulworld.mid
Devo whipit.mid
Devo enoughsaid.mid
Devo bigmess.mid
Devo peekaboo.mid
Devo jockohomo.mid
Devo freedomofchoice.mid
Devo superthing.mid
Bob Marley getup.mid
Bob Marley slavedriver.mid
Bob Marley rootsrockreggae.mid
Bob Marley jammin.mid
Bob Marley buffalosoldier.mid
Bob Marley concretejungle.mid
Bob Marley ratrace.mid
Bob Marley burninandlootin.mid
Bob Marley redemptionsong.mid
Bob Marley tomorrowpeople.mid
Bob Marley stiritup.mid
King Diamond abigail.mid
King Diamond fatherpicard.mid
King Diamond spirits.mid
King Diamon

Nirvana oldage.mid
Nirvana asshole.mid
Nirvana lithium.mid
Nirvana rapeme.mid
Nirvana onaplain(unplugged).mid
Nirvana school.mid
Nirvana mv.mid
Nirvana floydthebarber.mid
Nirvana versechorusverse.mid
Nirvana bigcheese.mid
Nirvana curmudgeon.mid
Toto amillionmilesaway.mid
Toto tilltheend.mid
Toto lion.mid
Toto mrfriendly.mid
Toto canyouhearwhatimsaying.mid
Toto giftwithagoldengun.mid
Toto taleofaman.mid
Toto bodhisattva.mid
Toto lea.mid
Toto loversinthenight.mid
Toto anna.mid
Blink 182 stockholmsyndrome.mid
Blink 182 countrysong.mid
Blink 182 causeyouhavesexwiththeguys.mid
Blink 182 dicklips(live).mid
Blink 182 firstdate(live).mid
Blink 182 happyholidaysyoubastard.mid
Blink 182 lemmings.mid
Blink 182 holdon.mid
Blink 182 themarktomandtravisshowfullalbum(part1).mid
Blink 182 anthempart2.mid
Blink 182 go.mid
Madonna littlestar.mid
Madonna santababy.mid
Madonna secret.mid
Madonna takeabow.mid
Madonna staticprocess.mid
Madonna youllsee.mid
Madonna rainbowhigh.mid
Madonna dearjessie.mid
Mad

Godsmack whatever.mid
Godsmack immune.mid
Godsmack trippin.mid
Godsmack sickoflife.mid
Godsmack badreligion.mid
Godsmack istandalone.mid
Godsmack keepaway.mid
Godsmack mistakes.mid
Godsmack vampires.mid
Godsmack makemebelieve.mid
Godsmack someoneinlondon.mid
Mariah Carey ifitsover.mid
Mariah Carey throughtherain.mid
Mariah Carey webelongtogether.mid
Mariah Carey whenisawyou.mid
Mariah Carey hero.mid
Mariah Carey someday.mid
Mariah Carey cantletgo.mid
Mariah Carey wheneveryoucall.mid
Mariah Carey withoutyou.mid
Mariah Carey breakdown.mid
Mariah Carey honey.mid


In [35]:
torch.save(learner.state_dict(), "../models/song.mod")

pickle.dump(song, open("../models/song.mid", "wb"))

In [37]:
learner.load_state_dict(torch.load("../models/song.mod"))
learner.eval()
song = pickle.load(open("../models/song.mid", "rb"))

midi = loader.detokenize(np.array(song[:100]))
midi.write('song.mid')

gen_seq = learner.map_inference(song[:100])
midi = loader.detokenize(np.array(song[:100] + gen_seq))
midi.write('pred.mid')

In [None]:
""" Testing Learner """
print_every = 10
total_epochs = 2900
print_loss_total = 0
batch_size = 2
startTime = time.time()
for epoch in range(1, total_epochs+1):
    supports = eps.get_episode().support[0]
    input_variables = []
    original_sequences = []
    
    for i, support in enumerate(supports):
        trunc_seq = [SOS_TOKEN] + list(support) + [EOS_TOKEN]
        original_sequences.append(trunc_seq)
        seq_length = len(trunc_seq)

        trunc_seq = torch.from_numpy(np.array(one_hot_embeddings[trunc_seq])) # This is really time consuming

        trunc_seq = trunc_seq.view(seq_length, vocabulary_size)
        trunc_seq = Variable(trunc_seq)
        input_variables.append(trunc_seq)
    
    original_sequences = np.array(original_sequences, dtype=np.int64)

    for batch in range(len(input_variables)//batch_size):
        # lstm input is (seq_len, batch_size, vocab_size)
        start, end = batch*batch_size, (batch+1)*batch_size
        sequences = torch.stack(input_variables[start:end]).transpose(0,1)
        numbered_seqs = torch.stack(Variable(torch.from_numpy(original_sequences[start:end]))).transpose(0,1)
        if use_cuda:
            sequences = sequences.cuda()
            numbered_seqs = numbered_seqs.cuda()
        loss = learner.train(sequences, numbered_seqs, hidden=None)
        print_loss_total += loss
        del sequences, numbered_seqs
    
    if epoch % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d %d%%) %.4f' % (timeSince(startTime, epoch / total_epochs),
                                     epoch, epoch / total_epochs * 100, print_loss_avg))
        torch.save(learner.state_dict(), '../models/baseline_'+str(epoch))
        

In [None]:
import psutil
import gc

def memReport():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj) or torch.is_tensor(obj.data):
                print(type(obj), obj.size())
        except:
            pass
    
def cpuStats():
        print(sys.version)
        print(psutil.cpu_percent())
        print(psutil.virtual_memory())  # physical memory usage
        pid = os.getpid()
        py = psutil.Process(pid)
        memoryUse = py.memory_info()[0] / 2. ** 30  # memory use in GB...I think
        print('memory GB:', memoryUse)

cpuStats()
memReport()

In [13]:
gc.collect()

667

In [None]:
#input_files = ['bach_846.mid', 'mz_311_1.mid', 'rac_op3_2.mid']
input_variables = []
original_sequences = []

for index in range(0, 290):
    supports = eps.get_episode().support[0]
    
    for i, support in enumerate(supports):
        trunc_seq = [SOS_TOKEN] + list(support) + [EOS_TOKEN]
        original_sequences.append(trunc_seq)
        seq_length = len(trunc_seq)

        trunc_seq = torch.from_numpy(np.array(one_hot_embeddings[trunc_seq])) # This is really time consuming

        trunc_seq = trunc_seq.view(seq_length, vocabulary_size)
        trunc_seq = Variable(trunc_seq)
        if use_cuda:
            trunc_seq = trunc_seq.cuda()
        input_variables.append(trunc_seq)
    
original_sequences = np.array(original_sequences, dtype=np.int64)

In [None]:
learner.load_state_dict(torch.load('../models/baseline_'+epoch+'_'+print_loss_avg))

In [44]:
song_index = 0
test_seq = input_variables[song_index][:,1:101] # First dimension is batch
out_seq = learner.map_inference(test_seq, hidden=None)
whole_seq = original_sequences[song_index][0:100].tolist() + out_seq
midi = loader.detokenize(np.array(whole_seq))
midi.write('test.mid')

RuntimeError: invalid argument 2: size '[1 x 1 x 4711]' is invalid for input with 1 elements at /opt/conda/conda-bld/pytorch_1518244507981/work/torch/lib/TH/THStorage.c:41

In [42]:
test_hidden = learner.encoder.initHidden(None).squeeze()
print(torch.stack([test_hidden, test_hidden]).unsqueeze(0))

Variable containing:
( 0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
[torch.cuda.DoubleTensor of size 1x2x500 (GPU 0)]

