In [1]:
import glob
import math
import random
import time

import music21
from music21 import *
import torch
from torch import optim, nn

from dataset import entchen
from pytorchmodels.AttnDecoderRNN import AttnDecoderRNN
from pytorchmodels.DecoderRNN import DecoderRNN
from pytorchmodels.EncoderRNN import EncoderRNN
from pytorchmodels.encoding import getStartIndex, getStopIndex, getTotalTokens, encodeNoteList, decodeSequence, \
    transposePart, getNoteList
from pytorchmodels.inference import evaluate
from pytorchmodels.training import train, trainIters

In [2]:
def getPartByName(piece, filter="soprano"):
    for p in piece.parts:  # type: music21.stream.Part
        if p.partName.lower() == filter.lower():
            return p
    return None

def parseMXLfiles(files, filter="soprano"):
    #print(files)
    notes = []
    for f in files:
        piece = converter.parse(f)
        p = getPartByName(piece, filter)

        if p is None:
            print("no part found", f, filter)
            continue

        #p.show()
        #print(p.analyze('key'))
        transposePart(p, inPlace=True)
        #print(p.analyze('key'))
        #p.show()
        #quit()

        if p is None:
            print("no part found in", f, "with filter", filter)

        notes.append(getNoteList(p, transpose=False))

    return notes

In [3]:
def generateTrainingData(notes, delta, split):
    max = 0
    data = []
    for n in notes:
        splitIndex = int(len(n)*split)
        x = n[0:splitIndex]
        y = n[splitIndex:]
        input = encodeNoteList(x,delta)
        data.append( (input, encodeNoteList(y,delta)+ [getStopIndex()], x, y) )
        if len(input) > max:
            max = len(input)
    return data, max

In [4]:
### Data preparation ###
delta = 0.5
splitFactor = 0.5

files = glob.glob('C:/Users/sorgm/datasets/music21corpus/bach/bwv*.mxl')
notes = parseMXLfiles(files)
encodedNotes, MAX_LENGTH = generateTrainingData(notes, delta, splitFactor)
#print(encodedNotes)
print("loaded", len(encodedNotes), "training points")

no part found C:/Users/sorgm/datasets/music21corpus/bach\bwv171.6.mxl soprano
no part found C:/Users/sorgm/datasets/music21corpus/bach\bwv227.3.mxl soprano
no part found C:/Users/sorgm/datasets/music21corpus/bach\bwv27.6.mxl soprano
no part found C:/Users/sorgm/datasets/music21corpus/bach\bwv41.6.mxl soprano
no part found C:/Users/sorgm/datasets/music21corpus/bach\bwv846.mxl soprano


In [8]:
print("loaded", len(encodedNotes), "training points")

train = encodedNotes[0:50]
test = encodedNotes[50:100]
encodedNotes = train

loaded 403 training points


In [10]:
### Training ###
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hidden_size = 128
encoder = EncoderRNN(getTotalTokens(), hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, getTotalTokens(), max_length=MAX_LENGTH, dropout_p=0.1).to(device)
#decoder = DecoderRNN(hidden_size, getTotalTokens()).to(device)

trainIters(encodedNotes, encoder, decoder, epochs=10, print_every=10, plot_every=2, max_length=MAX_LENGTH)


0m 0s (- -1m 59s) (0 0%) 0.5547
0m 3s (- 0m 12s) (10 20%) 3.3525
0m 5s (- 0m 8s) (20 40%) 2.3522
0m 8s (- 0m 5s) (30 60%) 2.0545
0m 11s (- 0m 2s) (40 80%) 1.5498
Epoch 1  finished. Loss: 1.1540486761864195
0m 14s (- -1m 45s) (0 0%) 1.5415
0m 16s (- 1m 6s) (10 20%) 1.6204
0m 19s (- 0m 28s) (20 40%) 1.8313
0m 22s (- 0m 14s) (30 60%) 1.6201
0m 24s (- 0m 6s) (40 80%) 1.5673
Epoch 2  finished. Loss: 1.1707161436689661
0m 27s (- -1m 32s) (0 0%) 1.4377
0m 30s (- 2m 0s) (10 20%) 1.4387
0m 33s (- 0m 49s) (20 40%) 1.6253
0m 36s (- 0m 24s) (30 60%) 1.5533
0m 38s (- 0m 9s) (40 80%) 1.4349
Epoch 3  finished. Loss: 1.151336669921875
0m 41s (- -1m 18s) (0 0%) 1.3623
0m 43s (- 2m 55s) (10 20%) 1.4163
0m 46s (- 1m 10s) (20 40%) 1.4244
0m 50s (- 0m 33s) (30 60%) 1.3862
0m 52s (- 0m 13s) (40 80%) 1.3821
Epoch 4  finished. Loss: 1.0692932453561337
0m 55s (- -1m 4s) (0 0%) 1.2519
0m 58s (- 3m 53s) (10 20%) 1.2344
1m 1s (- 1m 31s) (20 40%) 1.3380
1m 4s (- 0m 42s) (30 60%) 1.3504
1m 6s (- 0m 16s) (40 80%) 1.

In [12]:
torch.save(encoder, "encoder.pt")
torch.save(decoder, "decoder.pt")

In [13]:
### Inference ###
sampleIndex = 0
pair = test[sampleIndex]
input = pair[0]
target = pair[1]

decoded_words, decoder_attentions = evaluate(input, encoder, decoder, MAX_LENGTH)

inputNotes = encodedNotes[sampleIndex]
inputNotes = inputNotes[2]

p, _ = decodeSequence(decoded_words, inputNotes, delta=delta)
#p.show()

print("i:", input)
print("d:", decoded_words)
print("t:", target)


i: [67, 195, 67, 195, 69, 197, 69, 197, 71, 199, 71, 199, 72, 200, 200, 200, 74, 202, 74, 202, 72, 200, 72, 200, 71, 199, 69, 71, 72, 69, 197, 197, 197, 67, 195, 195, 195, 67, 195, 67, 195, 69, 197, 69, 197, 67, 195, 65, 193, 64, 192, 192, 192]
d: [67, 195, 69, 71, 72, 200, 71, 199, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 69, 197, 67, 195, 67, 195, 69, 197, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 67, 195, 69, 197, 67, 195, 67, 1

In [15]:
p.show("mxl")