# Download Midi Data

In [2]:
import os
import mido
import string
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from midi_ndarrays import *
import mido

cwd = os.getcwd()+'/'
midi_data_dir = cwd+'midi_data/'
csv_data_dir  = cwd+'csv_data/'

In [3]:
if not os.path.exists(midi_data_dir):
    os.makedirs(midi_data_dir)
    
if not os.path.exists(csv_data_dir):
    os.makedirs(csv_data_dir)

In [4]:
# download music if it hasn't already been downloaded
if len(os.listdir(midi_data_dir)) == 0:
    from midi_utils import download_midis
    download_midis(midi_data_dir)
    
# load all midi files into csv then save to csv_data_dir
if len(os.listdir(csv_data_dir)) == 0:
    for file in os.listdir(midi_data_dir):
        midi_file = midi_data_dir+file

        try:
            midi_tracks = mido.MidiFile(midi_file, clip=True)
        
        # ignore file if the key signature can't be read
        except:
            continue
            
        midi_array = mid2array(midi_tracks)

        # encode start and end tokens
        midi_array = np.pad(midi_array, 1)
        midi_array[0, 0] = 127
        midi_array[-1, -1] = 127
        
        # save each array as csv
        np.savetxt(csv_data_dir+file[:-4]+'.csv', midi_array, fmt="%d", delimiter=",")

In [None]:
def load_data(filename):
    arr = np.genfromtxt(filename, delimiter=',')
    
    # update all values where the note played isn't 0 to 1, else 0
    return np.where(arr != 0, 1, 0)

# The Model

In [95]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_p):
        super(EncoderRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers  = num_layers
        
        self.dropout   = nn.Dropout(dropout_p)
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn       = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout_p)
        
    def forward(self, x):
        embedding = self.dropout(self.embedding(x))
        outputs, (h, c) = self.rnn(embedding)
        return h, c

In [96]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, 
                num_layers, dropout_p):
    
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers  = num_layers

        self.dropout   = nn.Dropout(dropout_p)
        self.embedding = nn.Embedding(output_size, hidden_size)
        
        self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout_p)
        
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        
        embedding = self.dropout(self.embedding(x))
        
        outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
        
        predictions = self.fc(outputs)
         
        predictions = predictions.squeeze(0)
        
        return predictions, hidden, cell

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder

        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        batch_size = trg.shape[1]
        trg_len = trg.shape[0]
        
        trg_out = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_out).to(self.device)
        
        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        
        #first input to the decoder is the <sos> tokens
        input = trg[0,:]
        
        for t in range(1, trg_len):
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            output, hidden, cell = self.decoder(input, hidden, cell)
            
            #place predictions in a tensor holding predictions for each token
            outputs[t] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1) 
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = trg[t] if teacher_force else top1
        
        return outputs

In [5]:
drop_p  = 0.5

encoder = EncoderRNN(input_size=90, hidden_size=256, num_layers=2, dropout_p=drop_p)
decoder = DecoderRNN(hidden_size=256, output_size=90, num_layers=2, dropout_p=drop_p)
seq2seq = Seq2Seq(encoder, decoder)
# TODO
# for epoch in epochs
    # for song in epoch
        # pass to seq to seq, compute loss, optimize