In [1]:
import numpy as np
import math
import torch
import torch.nn as nn
import torch.optim as optim

import random

class LSTMnn(nn.Module):
    def __init__(self, num_categories, hidden_size, num_layers, dropout = 0):
        super(LSTMnn, self).__init__()
        self.num_layers = num_layers
        self.num_categories = num_categories
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(num_categories, hidden_size, num_layers, dropout = dropout)
        self.fc = nn.Linear(hidden_size, num_categories)
    
    def forward(self, batch, hidden_state):
        outputs, hidden_state = self.lstm(batch, hidden_state)
        outputs = self.fc(outputs)
        return outputs, hidden_state

    def init_hidden(self, batch_size = 1):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
                      weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
        return hidden


# Maps each music note in 'filename' to an integer
def createMap(filename):
    char2int = dict()
    int2char = dict()
    i = 1
    with open(filename) as f:
        while True:
            c = f.read(1)
            if not c:
                break
            if c not in char2int:
                char2int[c] = i
                int2char[i] = c
                i += 1
    return char2int, int2char

# One-hot encodes song using previously created mapping from notes to integers
def encodeSong(songString, char2int):
    encoding = torch.zeros((len(songString), len(char2int) + 1))
    for i in range(len(songString)):
        if songString[i] in char2int:
            ind = char2int[songString[i]]
        else:
            ind = 0
        encoding[i][ind] = 1
    return encoding

# One-hot encodes all songs in 'filename' and returns list of encoded songs
def encodeFile(filename, char2int):
    songs = []
    curSong = ""
    with open(filename) as f:
        for line in f:
            curSong += line
            if line == "<end>\n":
                songs.append(encodeSong(curSong, char2int))
                curSong = ""
    return songs

# Converts one-hot encoded song back into music notes using mapping from integers to notes
def decodeSong(rawEncoding, int2char):
    encoding = rawEncoding.reshape((-1, len(int2char) + 1))
    song = ""
    for e in encoding:
        if np.argmax(e) in int2char:
            song +=  int2char[np.argmax(e)]
    return song

def decodeInts(ints, int2char, song = ""):
    for i in ints:
        if i in int2char:
            song += int2char[i]
    return song

if torch.cuda.is_available():
    print("GPU is available!")
    device = torch.device("cuda")
else:
    print("No GPU available...")
    device = torch.device("cpu")






No GPU available...


In [2]:
# Create a list of one-hot encoding of songs
char2int, int2char = createMap("train.txt")
songs = encodeFile("train.txt", char2int)
val_songs = encodeFile("val.txt", char2int)

# Input includes each valid note value plus an extra mapping for any unknown characters
input_size  = len(char2int) + 1
hidden_size = 100
num_layers = 2

# Training variables
epochs = 100
notes_per_batch = 100
learning_rate = .003

# Creates network, backprop system with learning rate of 0.001, and cross entropy loss criterion
lstm = LSTMnn(input_size, hidden_size, num_layers).to(device)
optimizer = optim.Adam(lstm.parameters(),lr = learning_rate)
criterion = nn.CrossEntropyLoss()



In [None]:


# Training epochs
vlosses = []
tlosses = []
early_stop = 1
for e in range(epochs):
    print("epoch ", e)

    # Shuffles training data
    indices = list(range(len(songs)))
    random.shuffle(indices)
    tloss = []
    
    # Loop through training data
    for songNum, i in enumerate(indices):
        if songNum % 100 == 99:
            print("Training song #%d" % (songNum + 1))
        
        # Initializes the hidden state = (short term, long term) and the current song
        hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
        song = songs[i]

        # Loops through song 100 notes at a time
        for j in range(math.ceil(len(song) / notes_per_batch)):
            lstm.zero_grad()

            # Creates a batch of 100 notes, reshaping to give an extra dimension
            batch = song[j * notes_per_batch : min((j + 1) * notes_per_batch, len(song) - 1)]
            batch = batch.reshape( (-1, 1, input_size)).to(device)

            # Creates a batch of the 100 notes the model should return, which is the note after the current note
            teacher = song[j * notes_per_batch + 1 : min((j + 1) * notes_per_batch + 1, len(song))]
            if len(teacher) == 0:
                continue
            teacher = torch.argmax(teacher, 1).to(device)
            
            # Calls the lstm model with the current batch and hidden state
            outputs, hidden_state = lstm(batch, (hidden_state[0].detach(), hidden_state[1].detach()))

            # Performs backprop
            loss = criterion(outputs.reshape(-1, input_size), teacher)
            tloss.append(loss.item())
            loss.backward()
            optimizer.step()
    tlosses.append(sum(tloss) / len(tloss))
    print("training loss: ", sum(tloss) / len(tloss))
        
    with torch.no_grad():
        loss = 0
        for song in val_songs:
            hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
            
            teacher = song[1:]

            teacher = torch.argmax(teacher, 1).to(device)

            song = song.reshape(-1, 1, input_size).to(device)

            output, hidden_state = lstm(song, hidden_state)

            output = output.reshape(-1, input_size)

            loss += criterion(output[:len(output) - 1], teacher)

        print(loss.item() / len(val_songs))
        vlosses.append(loss.item() / len(val_songs))
        if e >= early_stop:
            stop = True
            for i in range(early_stop):
                if vlosses[e - i] < vlosses[e - i - 1]:
                    stop = False

            if stop:
                break
torch.save(lstm.state_dict(), "./model.pt")

epoch  0
Training song #100
Training song #200
Training song #300
Training song #400
Training song #500
Training song #600
Training song #700
Training song #800
training loss:  2.198507622169787
1.9763635334215666
epoch  1
Training song #100


In [10]:
##SONG GENERATION
import re
regex = re.compile('"."')
start_string = '<start>\n'
Temperature = .7
softmax = nn.Softmax()
while True:
    with torch.no_grad():
        hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
        encoding = encodeSong(start_string, char2int)
        curchar = torch.tensor(encoding).to(device)
        encoding = [torch.argmax(e).item() for e in encoding]
        song = start_string
        while "<end>" not in song:
            curchar = curchar.reshape(-1, 1, input_size).to(device)
            outputs, hidden_state = lstm(curchar, hidden_state)
            curchar = outputs[len(outputs) - 1][0]
            curchar /= Temperature
            curchar = softmax(curchar).cpu().numpy()
            guess = np.random.choice(np.arange(input_size), p = curchar)
            curchar = torch.zeros((input_size)).to(device)
            curchar[guess] = 1
            if guess in int2char:
                song += int2char[guess]
        print(song)
        print("---")
        

            

  # This is added back by InteractiveShellApp.init_path()


<start>
X:11
T:La toushersous, see #104
D:Noel Mazurka a she perg?e
R:Bourre? de la partsoy
Z:id:hn-hornpipe-8
M:C|
K:G
(3ABc|AFDF G2EF|GFGB AGFA|BGGF G2:|
<end>
---
<start>
X:53
T:Branle
R:Branslanome larousion
C:Trad.
S:Thoinot Arbeau
N:0175.gif
B:Orchesographie (1589)
O:France
Z:Transcrit et/ou corrig? par Michel BELLON - 2006-11-23
Z:Pour toute observation mailto:galouvielle@free.fr
M:2/4
L:1/8
Q:1/4=120
R:Transcrit et/ou corrig? par Michel BELLON - 2007-04-03
Z:Pour toute observation mailto:galouvielle@free.fr
M:2/4
L:1/8
Q:1/4=120
K:Bb
V:Farone
AA Ge de | dc BA GB | c2 e2 de | f2 gg b2 | g2 g2 f2 | e2 fe d2 | f2 ed cA | B2 B2 A2 | G4 || 
<end>
---
<start>
X:83
T:Ro?requetedufetso de la bauge
O:France
A:Provence
C:ariestias Hornpipe
C:James Hornpipe", veel~
Z:id:hn-hornpipe-89
M:C|
K:Ador
cA | cAFA GABG | AGAB cdef | gddf gedc |
B2Bd BAGF | A2G2 G2BA | AGAB cBAF | E2EE D2GF | FGAF E2G2 | G2G2 G2 :|
<end>
---
<start>
X:49
T:Pad?ron ? Callave
R:Schottische
Q:1/4=105
Z:Transcrit et/o

KeyboardInterrupt: 

In [None]:
%matplotlib inline

print(vlosses[len(vlosses) - 1])
import matplotlib.pyplot as plt
plt.plot(np.arange(len(vlosses)), np.array(vlosses), label = "Validation Loss")
plt.plot(np.arange(len(tlosses)), np.array(tlosses), label = "Training Loss")
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss with " + str(num_layers) + " " + str(hidden_size) + " node layer, " + str(learning_rate) + " learning rate, " + str(notes_per_batch) + " character chunk")
plt.show()

In [None]:
###HEATMAP GENERATION
import seaborn as sb

# Initializes the hidden state = (short term, long term) and the current song
hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
song = songs[0]
outs = []
labs = []
with torch.no_grad():
    # Loops through song 100 notes at a time
    for j in range(len(song) - 1):
        inp = song[j]
        inp = inp.reshape(1, 1, input_size).to(device)
        output, hidden_state = lstm(inp, hidden_state)
        lab = torch.argmax(song[j + 1], 0).to(device)
        
        labs.append(int2char[lab.item()])
        outs.append(hidden_state[0][0][0][150].item())
        
outs = np.array(outs)
labs = np.array(labs)
outs = outs.reshape(34,-1)
labs = labs.reshape(34,-1)

        


In [None]:
plt.figure(figsize=(10,10))
heat_map = sb.heatmap(outs, annot=labs, fmt='', cmap="YlOrRd", square=False)
bottom, top = heat_map.get_ylim()
heat_map.set_ylim(bottom + 0.5, top - 0.5)
heat_map.invert_yaxis()
plt.show()

In [None]:
# Initializes the hidden state = (short term, long term) and the current song
hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
song = songs[i]
outs = []
labs = []
tsongs = encodeFile("test.txt", char2int)
right = 0
m_loss = 0
wrong = 0
with torch.no_grad():
    # Loops through song 100 notes at a time
    for i in range(len(tsongs)):
        song = tsongs[i]
        hidden_state = (torch.zeros((num_layers, 1, hidden_size)).to(device), torch.zeros((num_layers, 1, hidden_size)).to(device))
            
        teacher = song[1:]

        teacher = torch.argmax(teacher, 1).to(device)

        song = song.reshape(-1, 1, input_size).to(device)

        output, hidden_state = lstm(song, hidden_state)

        output = output.reshape(-1, input_size)

        m_loss += criterion(output[:len(output) - 1], teacher)
print(m_loss / i)