In [1]:
import glob
import time

import music21
from music21 import *
import torch

from pytorchmodelsBATCH.AttnDecoderRNN import AttnDecoderRNN
from pytorchmodelsBATCH.DecoderRNN import DecoderRNN
from pytorchmodelsBATCH.EncoderRNN import EncoderRNN
from pytorchmodelsBATCH.encoding import getStartIndex, getStopIndex, getTotalTokens, encodeNoteList, decodeSequence, \
    transposePart, getNoteList, getPadIndex
from pytorchmodelsBATCH.inference import evaluate
from pytorchmodelsBATCH.training import train, trainIters

In [2]:
def getPartByName(piece, filter="soprano"):
    for p in piece.parts:  # type: music21.stream.Part
        if p.partName.lower() == filter.lower():
            return p
    return None

def parseMXLfiles(pathPattern, filter="soprano"):
    files = glob.glob(pathPattern)
    print(files)
    notes = []
    for f in files:
        piece = converter.parse(f)
        p = getPartByName(piece, filter)

        if p is None:
            print("no part found in", f, "with filter", filter)
            continue

        #p.show()
        #print(p.analyze('key'))
        transposePart(p, inPlace=True)
        #print(p.analyze('key'))
        #p.show()
        #quit()

        notes.append(getNoteList(p, transpose=False))

    return notes

In [3]:
def generateTrainingData(notes, delta, split):
    max = 0
    data = []
    for n in notes:
        splitIndex = int(len(n)*split)
        x = n[0:splitIndex]
        y = n[splitIndex:]
        input = encodeNoteList(x,delta)
        data.append( (input, encodeNoteList(y,delta)+ [getStopIndex()], x, y) )
        if len(input) > max:
            max = len(input)
    return data, max


def padBatch(pairs):
    maxIn = 0
    maxTarget = 0
    input = []
    target = []
    for p in pairs:
        if len(p[0]) > maxIn:
            maxIn = len(p[0])
        if len(p[1]) > maxTarget:
            maxTarget = len(p[1])


    for p in pairs:
        padsIn =  maxIn - len(p[0])
        padsTarget = maxTarget - len(p[1])

        input.append( p[0] + [getPadIndex()]*padsIn)
        target.append( p[1] + [getStopIndex()] * padsTarget)
        #print(input[-1], target[-1])

    input = torch.tensor(input)
    target = torch.tensor(target)

    return input, target

In [4]:
### Data preparation ###
delta = 0.5
splitFactor = 0.5
#notes = parseMXLfiles('C:/Users/sorgm/datasets/music21corpus/bach/bwv3.6.mxl')
notes = parseMXLfiles('C:/Users/Shadow/music21/music21/corpus/bach/bwv1*.mxl')
#print(len(notes))
#quit()

encodedNotes, MAX_LENGTH = generateTrainingData(notes, delta, splitFactor)
#print(encodedNotes)

['C:/Users/Shadow/music21/music21/corpus/bach\\bwv1.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv10.7.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv101.7.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv102.7.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv103.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv104.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv108.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv11.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv110.7.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv111.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv112.5-sc.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv112.5.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv113.8.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv114.7.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv115.6.mxl', 'C:/Users/Shadow/music21/music21/corpus/bach\\bwv116.6.mxl', 'C:/Users/Shadow/music21

In [5]:
print("loaded", len(encodedNotes), "training points")

train = encodedNotes[0:50]
test = encodedNotes[50:100]
encodedNotes = train

loaded 90 training points


In [13]:
input, target = padBatch(encodedNotes)
print("batch shapes:", input.shape, target.shape)

batch shapes: torch.Size([50, 103]) torch.Size([50, 123])


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
hidden_size = 128
encoder = EncoderRNN(getTotalTokens(), hidden_size, device).to(device)
#decoder = AttnDecoderRNN(hidden_size, getTotalTokens(), max_length=MAX_LENGTH, dropout_p=0.1).to(device)
decoder = DecoderRNN(hidden_size, getTotalTokens(), device).to(device)


batches = [(input.to(device), target.to(device))]

s = time.time()
trainIters(batches, encoder, decoder, epochs=200, print_every=200, plot_every=200, device=device,max_length=MAX_LENGTH)
print(time.time()-s)

0m 0s (- -1m 59s) (0 0%) 0.0279
Epoch 1  finished. Loss: 5.582451052782012
0m 1s (- -1m 58s) (0 0%) 0.0193
Epoch 2  finished. Loss: 3.8564066072789633
0m 2s (- -1m 57s) (0 0%) 0.0132
Epoch 3  finished. Loss: 2.6395509301162345
0m 2s (- -1m 57s) (0 0%) 0.0114
Epoch 4  finished. Loss: 2.273967463795732
0m 3s (- -1m 56s) (0 0%) 0.0106
Epoch 5  finished. Loss: 2.118817337160188
0m 4s (- -1m 55s) (0 0%) 0.0104
Epoch 6  finished. Loss: 2.0833033119759907
0m 5s (- -1m 54s) (0 0%) 0.0098
Epoch 7  finished. Loss: 1.9612205939564278
0m 5s (- -1m 54s) (0 0%) 0.0096
Epoch 8  finished. Loss: 1.9245024890434452
0m 6s (- -1m 53s) (0 0%) 0.0094
Epoch 9  finished. Loss: 1.884659681862932
0m 7s (- -1m 52s) (0 0%) 0.0093
Epoch 10  finished. Loss: 1.854654823861471
0m 8s (- -1m 51s) (0 0%) 0.0091
Epoch 11  finished. Loss: 1.8205020563389227
0m 9s (- -1m 50s) (0 0%) 0.0090
Epoch 12  finished. Loss: 1.8005905771643165
0m 9s (- -1m 50s) (0 0%) 0.0089
Epoch 13  finished. Loss: 1.7872407494521723
0m 10s (- -1m

1m 21s (- -2m 38s) (0 0%) 0.0062
Epoch 107  finished. Loss: 1.2351281390926703
1m 22s (- -2m 37s) (0 0%) 0.0061
Epoch 108  finished. Loss: 1.228746739829459
1m 23s (- -2m 36s) (0 0%) 0.0061
Epoch 109  finished. Loss: 1.224819648556593
1m 24s (- -2m 35s) (0 0%) 0.0061
Epoch 110  finished. Loss: 1.222438285021278
1m 24s (- -2m 35s) (0 0%) 0.0061
Epoch 111  finished. Loss: 1.2209594230341718
1m 25s (- -2m 34s) (0 0%) 0.0061
Epoch 112  finished. Loss: 1.222342018189469
1m 26s (- -2m 33s) (0 0%) 0.0062
Epoch 113  finished. Loss: 1.2351096548685214
1m 27s (- -2m 32s) (0 0%) 0.0065
Epoch 114  finished. Loss: 1.306862559744982
1m 27s (- -2m 32s) (0 0%) 0.0073
Epoch 115  finished. Loss: 1.4669921378779218
1m 28s (- -2m 31s) (0 0%) 0.0070
Epoch 116  finished. Loss: 1.408484110018102
1m 29s (- -2m 30s) (0 0%) 0.0062
Epoch 117  finished. Loss: 1.2449462394404218
1m 30s (- -2m 29s) (0 0%) 0.0061
Epoch 118  finished. Loss: 1.226967292103341
1m 30s (- -2m 29s) (0 0%) 0.0066
Epoch 119  finished. Loss:

In [15]:
torch.save(encoder, "encoder.pt")
torch.save(decoder, "decoder.pt")

In [16]:
### Inference ###
sampleIndex = 0
pair = test[sampleIndex]
input = pair[0]
target = pair[1]

decoded_words, decoder_attentions = evaluate(input, encoder, decoder, MAX_LENGTH, device)

inputNotes = encodedNotes[sampleIndex]
inputNotes = inputNotes[2]

p, _ = decodeSequence(decoded_words, inputNotes, delta=delta)
#p.show()

print("i:", input)
print("d:", decoded_words)
print("t:", target)


Not implemented Tie. Index: 192
Not implemented Tie. Index: 192
Not implemented Tie. Index: 195
Not implemented Tie. Index: 195
Not implemented Tie. Index: 195
Not implemented Tie. Index: 195
Not implemented Tie. Index: 195
Not implemented Tie. Index: 195
Not implemented Tie. Index: 197
Not implemented Tie. Index: 197
Not implemented Tie. Index: 197
Not implemented Tie. Index: 190
i: [67, 195, 67, 195, 69, 197, 69, 197, 71, 199, 71, 199, 72, 200, 200, 200, 74, 202, 74, 202, 72, 200, 72, 200, 71, 199, 69, 71, 72, 69, 197, 197, 197, 67, 195, 195, 195, 67, 195, 67, 195, 69, 197, 69, 197, 67, 195, 65, 193, 64, 192, 192, 192]
d: [71, 192, 69, 192, 69, 195, 69, 195, 69, 195, 69, 195, 69, 195, 69, 195, 67, 197, 69, 197, 69, 197, 67, 197, 69, 197, 67, 197, 69, 197, 64, 190, 257]
t: [67, 195, 67, 195, 65, 193, 65, 193, 64, 192, 62, 64, 65, 62, 190, 190, 190, 60, 188, 188, 188, 72, 200, 72, 200, 71, 199, 71, 199, 69, 197, 69, 197, 67, 195, 195, 195, 67, 195, 67, 195, 65, 193, 64, 192, 62, 190, 6

In [17]:
p.show("mxl")