## LSTM на оригинальном датасете

Попытка сделать монофонический выход из сетки

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

Сделаем также пользовательский импорт

In [2]:
from decode_patterns import data_conversion

Загружаем датасет

In [3]:
# import dataset
drum, bass = data_conversion.make_lstm_dataset(height=32, limit=1000, patterns_file="decode_patterns/patterns.pairs.tsv", mono=True)


# define shuffling of dataset
def shuffle(A, B, p=0.8):
    # take 80% to training, other to testing
    L = len(A)
    idx = np.arange(L) < p*L
    np.random.shuffle(idx)
    yield A[idx]
    yield B[idx]
    yield A[np.logical_not(idx)]
    yield B[np.logical_not(idx)]
    
    
# we can select here a validation set
drum, bass, drum_validation, bass_validation = shuffle(drum, bass)
    
# and we can shuffle train and test set like this:
# drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)

In [4]:
bass_validation[9]

array([ 0,  0,  1,  0,  0,  1,  0,  4,  0,  0,  1,  0,  6,  4,  0,  1,  0,
        0,  1,  0,  0,  1,  0,  4,  0,  0,  1,  0,  4, 13,  0,  1],
      dtype=int64)

Модель определим в самом простом варианте, который только можно себе представить -- как в примере с конечным автоматом

In [26]:
# попробуем определить модель LSTM как конечный автомат
class DrumNBassLSTM(nn.Module):
    def __init__(self):
        super(DrumNBassLSTM, self).__init__()
        # one input neuron, one output neuron, one layer in LSTM block
        self.input_size = 14
        self.hidden_size = 34
        self.layer_count = 1
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.layer_count)
        self.embed_layer = nn.Linear(self.hidden_size, 1)
    
    def forward(self, input):
        # пусть в input у нас приходит вектор размерности (64, 32, 14)
        # то есть 64 отсчёта, тридцать два примера (минибатч), 14 значение в каждом (барабанная партия)
        output, _ = self.lstm(input)
        output = self.embed_layer(output)
        return output

In [27]:
# часть обучения
dnb_lstm = DrumNBassLSTM()

criterion = nn.MSELoss()

# оценим также и разнообразие мелодии по её.. дисперсии?)
# def melody_variety(melody):
#     return 1/(1 + (melody.sum(axis=2) > 1).int())
    
# criterion = nn.NLLLoss() # -- этот товарищ требует, чтобы LSTM выдавал классы,
# criterion = nn.CrossEntropyLoss() # и этот тоже
# (числа от 0 до C-1), но как всё-таки его заставить это делать?...
# optimizer = optim.SGD(dnb_lstm.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(dnb_lstm.parameters(), lr=0.001)

Найденные баги и их решения:

https://stackoverflow.com/questions/56741087/how-to-fix-runtimeerror-expected-object-of-scalar-type-float-but-got-scalar-typ

https://stackoverflow.com/questions/49206550/pytorch-error-multi-target-not-supported-in-crossentropyloss/49209628

https://stackoverflow.com/questions/56243672/expected-target-size-50-88-got-torch-size50-288-88

In [28]:
epoch_count = 500
batch_size = 128
shuffle_every_epoch = True
    
if shuffle_every_epoch:
    print(f"shuffle_every_epoch is on")
else:
    print(f"shuffle_every_epoch is off")
    # shuffle train and test set:
    drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
    drum_train = torch.tensor(drum_train, dtype=torch.float)
    bass_train = torch.tensor(bass_train, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
        
for epoch in range(epoch_count):  # loop over the dataset multiple times
    print(f"Epoch #{epoch}")
    if shuffle_every_epoch:
        # shuffle train and test set:
        drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
        drum_train = torch.tensor(drum_train, dtype=torch.float)
        bass_train = torch.tensor(bass_train, dtype=torch.float)
        drum_test = torch.tensor(drum_test, dtype=torch.float)
        bass_test = torch.tensor(bass_test, dtype=torch.float)
        
    examples_count = drum_train.size()[0]
    examples_id = 0
    
    running_loss = 0.0
    runnint_count = 0
    batch_id = 0
    while examples_id < examples_count:
        batch_drum_train = drum_train[examples_id:examples_id + batch_size,:,:].transpose(0,1)
        batch_bass_train = bass_train[examples_id:examples_id + batch_size,].transpose(0,1)
        # transpose нужен для обмена размерности батча и размерности шагов
#         print(f"batch_drum_train:{batch_drum_train.size()}, batch_bass_train:{batch_bass_train.size()}")

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        bass_outputs = dnb_lstm(batch_drum_train).squeeze()
#         bass_outputs = bass_outputs.reshape(bass_outputs.size()[0], -1)
#         batch_bass_train = batch_bass_train.reshape(batch_bass_train.size()[0], -1)
#         print(f"bass_outputs:{bass_outputs.size()} batch_bass_train: {batch_bass_train.size()}")
#         print(f"bass_outputs:{bass_outputs} batch_bass_train: {batch_bass_train}")
        
        # loss = criterion(bass_outputs, batch_bass_train.long())
        loss = criterion(bass_outputs, batch_bass_train)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        runnint_count += 1
        period = 5
        if batch_id % period == 0 or examples_id + batch_size >= examples_count:
            print('[%d, %5d] train loss: %.7f' %
                  (epoch + 1, batch_id + 1, running_loss / runnint_count))
            running_loss = 0.0
            runnint_count = 1
            
        # update batch info
        examples_id += batch_size
        batch_id += 1
        
    # here we can insert measure error on test set

#should check accuracy on validation set
print('Finished Training')

shuffle_every_epoch is on
Epoch #0
[1,     1] train loss: 55.0815125
[1,     5] train loss: 54.0247192
Epoch #1
[2,     1] train loss: 58.2123909
[2,     5] train loss: 58.4379684
Epoch #2
[3,     1] train loss: 53.3883820
[3,     5] train loss: 56.6465195
Epoch #3
[4,     1] train loss: 57.5456352
[4,     5] train loss: 54.9832253
Epoch #4
[5,     1] train loss: 56.4686127
[5,     5] train loss: 54.6455246
Epoch #5
[6,     1] train loss: 61.8999710
[6,     5] train loss: 55.8028076
Epoch #6
[7,     1] train loss: 55.3449707
[7,     5] train loss: 55.5373611
Epoch #7
[8,     1] train loss: 53.3062019
[8,     5] train loss: 49.9073456
Epoch #8
[9,     1] train loss: 53.2670708
[9,     5] train loss: 51.7305557
Epoch #9
[10,     1] train loss: 50.4468842
[10,     5] train loss: 50.6780014
Epoch #10
[11,     1] train loss: 49.8167686
[11,     5] train loss: 46.7362465
Epoch #11
[12,     1] train loss: 42.4862099
[12,     5] train loss: 47.9254242
Epoch #12
[13,     1] train loss: 43.76885

[103,     1] train loss: 37.3017845
[103,     5] train loss: 40.6891533
Epoch #103
[104,     1] train loss: 38.1578369
[104,     5] train loss: 41.4986488
Epoch #104
[105,     1] train loss: 41.4369926
[105,     5] train loss: 40.3532227
Epoch #105
[106,     1] train loss: 38.2214775
[106,     5] train loss: 41.1353477
Epoch #106
[107,     1] train loss: 36.0690155
[107,     5] train loss: 41.3651764
Epoch #107
[108,     1] train loss: 37.2239189
[108,     5] train loss: 39.8700073
Epoch #108
[109,     1] train loss: 39.8099709
[109,     5] train loss: 41.7670700
Epoch #109
[110,     1] train loss: 39.7547951
[110,     5] train loss: 40.4069702
Epoch #110
[111,     1] train loss: 36.1549530
[111,     5] train loss: 37.9439552
Epoch #111
[112,     1] train loss: 39.8429451
[112,     5] train loss: 39.6974319
Epoch #112
[113,     1] train loss: 42.8182297
[113,     5] train loss: 38.9426468
Epoch #113
[114,     1] train loss: 41.3066177
[114,     5] train loss: 39.8327782
Epoch #114
[115

[201,     5] train loss: 40.0135132
Epoch #201
[202,     1] train loss: 40.7952499
[202,     5] train loss: 36.3156029
Epoch #202
[203,     1] train loss: 38.4012680
[203,     5] train loss: 38.4030685
Epoch #203
[204,     1] train loss: 39.1867142
[204,     5] train loss: 37.7161819
Epoch #204
[205,     1] train loss: 35.5377617
[205,     5] train loss: 37.7814430
Epoch #205
[206,     1] train loss: 41.8268089
[206,     5] train loss: 37.9608269
Epoch #206
[207,     1] train loss: 37.3451729
[207,     5] train loss: 39.3686737
Epoch #207
[208,     1] train loss: 37.3448830
[208,     5] train loss: 36.7835831
Epoch #208
[209,     1] train loss: 41.0972366
[209,     5] train loss: 38.6580109
Epoch #209
[210,     1] train loss: 37.8476562
[210,     5] train loss: 37.2570038
Epoch #210
[211,     1] train loss: 36.1445122
[211,     5] train loss: 38.1028534
Epoch #211
[212,     1] train loss: 37.2768288
[212,     5] train loss: 38.9222847
Epoch #212
[213,     1] train loss: 43.7220345
[213

[300,     1] train loss: 39.3946075
[300,     5] train loss: 36.2664185
Epoch #300
[301,     1] train loss: 36.3524704
[301,     5] train loss: 36.0679825
Epoch #301
[302,     1] train loss: 41.3061256
[302,     5] train loss: 37.0436882
Epoch #302
[303,     1] train loss: 38.0026741
[303,     5] train loss: 36.6489006
Epoch #303
[304,     1] train loss: 40.7772713
[304,     5] train loss: 37.2549118
Epoch #304
[305,     1] train loss: 39.6023483
[305,     5] train loss: 36.8152008
Epoch #305
[306,     1] train loss: 36.7457123
[306,     5] train loss: 37.3704300
Epoch #306
[307,     1] train loss: 37.5564232
[307,     5] train loss: 35.2430977
Epoch #307
[308,     1] train loss: 37.7203484
[308,     5] train loss: 36.2996910
Epoch #308
[309,     1] train loss: 37.0042496
[309,     5] train loss: 37.5654617
Epoch #309
[310,     1] train loss: 36.6796265
[310,     5] train loss: 34.8731987
Epoch #310
[311,     1] train loss: 38.5767937
[311,     5] train loss: 36.0142326
Epoch #311
[312

[398,     5] train loss: 36.3243355
Epoch #398
[399,     1] train loss: 41.5402145
[399,     5] train loss: 36.7010750
Epoch #399
[400,     1] train loss: 41.0632362
[400,     5] train loss: 35.4529060
Epoch #400
[401,     1] train loss: 34.3475723
[401,     5] train loss: 37.0097656
Epoch #401
[402,     1] train loss: 39.5837212
[402,     5] train loss: 36.5975990
Epoch #402
[403,     1] train loss: 34.8061752
[403,     5] train loss: 36.7039013
Epoch #403
[404,     1] train loss: 35.8986740
[404,     5] train loss: 35.6921181
Epoch #404
[405,     1] train loss: 38.7603340
[405,     5] train loss: 36.4510094
Epoch #405
[406,     1] train loss: 36.1041222
[406,     5] train loss: 34.8751030
Epoch #406
[407,     1] train loss: 40.5157051
[407,     5] train loss: 37.1914391
Epoch #407
[408,     1] train loss: 40.4533005
[408,     5] train loss: 36.1681877
Epoch #408
[409,     1] train loss: 38.1235161
[409,     5] train loss: 34.8661278
Epoch #409
[410,     1] train loss: 40.2241898
[410

[497,     1] train loss: 36.8729668
[497,     5] train loss: 37.8872742
Epoch #497
[498,     1] train loss: 35.5148964
[498,     5] train loss: 36.8395004
Epoch #498
[499,     1] train loss: 38.3252754
[499,     5] train loss: 36.9019302
Epoch #499
[500,     1] train loss: 37.0729294
[500,     5] train loss: 35.7297462
Finished Training


In [29]:
batch_drum_train = drum_train[:,:,:].transpose(0,1)
batch_bass_train = bass_train[:,:].transpose(0,1)
with torch.no_grad():
    bass_outputs = dnb_lstm(batch_drum_train)

In [30]:
bass_outputs.squeeze().int()

tensor([[5, 1, 1,  ..., 3, 3, 5],
        [3, 6, 6,  ..., 5, 5, 2],
        [5, 1, 1,  ..., 5, 5, 3],
        ...,
        [2, 5, 5,  ..., 4, 4, 0],
        [5, 0, 0,  ..., 3, 3, 2],
        [2, 7, 7,  ..., 6, 6, 1]], dtype=torch.int32)

Попробуем сохранить результаты работы сети. На anaconda нет mido, поэтому сохраняем результаты работы просто в массивчик npy... Однако, как альтернатива, его можно поставить чере pip в conda:
https://github.com/mido/mido/issues/198

In [31]:
import mido
from decode_patterns.data_conversion import build_track, DrumMelodyPair, Converter


converter = Converter((32,50))

batch_drum = torch.cat((drum_train, drum_test, torch.tensor(drum_validation))).transpose(0,1)
batch_bass = torch.cat((bass_train.int(), bass_test.int(), torch.tensor(bass_validation).int())).transpose(0,1)
with torch.no_grad():
    bass_outputs = dnb_lstm(batch_drum)
    bass_outputs = bass_outputs.squeeze().int()
    
    for i in range(bass_outputs.size()[1]):
        bass_seq = bass_outputs[:,i]
#         bass_seq = batch_bass[:,i]
#         print(f"bass_seq:{bass_seq.size()}")
        bass_output = []
        for bass_note in bass_seq:
            bass_row = np.eye(1, 36, bass_note - 1)[0]
            bass_output.append(bass_row)
        bass_output = torch.tensor(bass_output).int().squeeze()
#         print(f"bass_output:{bass_output.size()}")
        
#         print(f"batch_drum:{batch_drum[:,i,:].size()}, bass_output:{bass_output.size()}")
            
        img_dnb = torch.cat((batch_drum[:,i,:].int(),bass_output), axis=1)
#         print(f"img_dnb:{list(bass_output)}")
        pair = converter.convert_numpy_image_to_pair(np.array(img_dnb))
#         print(f"pair.melody:{pair.melody}")
        mid = build_track(pair, tempo=pair.tempo)
        mid.save(f"midi/npy/sample{i+1}.mid")
#         np.save(f"midi/npy/drum{i+1}.npy", batch_drum[:,i,:].int())
#         np.save(f"midi/npy/bass{i+1}.npy", bass_outputs[:,i,:])