## LSTM на оригинальном датасете

Попытка сделать монофонический выход из сетки

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

Сделаем также пользовательский импорт

In [2]:
from decode_patterns import data_conversion

Загружаем датасет

In [4]:
# import dataset
drum, bass = data_conversion.make_lstm_dataset(height=16, limit=1000, patterns_file="decode_patterns/patterns.pairs.tsv", mono=True)


# define shuffling of dataset
def shuffle(A, B, p=0.8):
    # take 80% to training, other to testing
    L = len(A)
    idx = np.arange(L) < p*L
    np.random.shuffle(idx)
    yield A[idx]
    yield B[idx]
    yield A[np.logical_not(idx)]
    yield B[np.logical_not(idx)]
    
    
# we can select here a validation set
drum, bass, drum_validation, bass_validation = shuffle(drum, bass)
    
# and we can shuffle train and test set like this:
drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)

In [5]:
bass_validation[8]

array([1, 1, 8, 1, 8, 1, 1, 1, 8, 1, 8, 1, 8, 1, 1, 6], dtype=int64)

Модель определим в самом простом варианте, который только можно себе представить -- как в примере с конечным автоматом

In [6]:
# попробуем определить модель LSTM как конечный автомат
class DrumNBassLSTM(nn.Module):
    def __init__(self):
        super(DrumNBassLSTM, self).__init__()
        # one input neuron, one output neuron, one layer in LSTM block
        self.input_size = 14
        self.hidden_size = 34
        self.layer_count = 1
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.layer_count)
        self.embed_layer = nn.Linear(self.hidden_size, 1)
        self.sigm = nn.Sigmoid()
    
    def forward(self, input):
        # пусть в input у нас приходит вектор размерности (64, 32, 14)
        # то есть 64 отсчёта, тридцать два примера (минибатч), 14 значение в каждом (барабанная партия)
        output, _ = self.lstm(input)
        output = self.sigm(self.embed_layer(output))*37
        return output

In [7]:
# часть обучения
dnb_lstm = DrumNBassLSTM()

criterion = nn.MSELoss()

# оценим также и разнообразие мелодии по её.. дисперсии?)
# def melody_variety(melody):
#     return 1/(1 + (melody.sum(axis=2) > 1).int())
    
# criterion = nn.NLLLoss() # -- этот товарищ требует, чтобы LSTM выдавал классы,
# criterion = nn.CrossEntropyLoss() # и этот тоже
# (числа от 0 до C-1), но как всё-таки его заставить это делать?...
# optimizer = optim.SGD(dnb_lstm.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(dnb_lstm.parameters(), lr=0.001)

Найденные баги и их решения:

https://stackoverflow.com/questions/56741087/how-to-fix-runtimeerror-expected-object-of-scalar-type-float-but-got-scalar-typ

https://stackoverflow.com/questions/49206550/pytorch-error-multi-target-not-supported-in-crossentropyloss/49209628

https://stackoverflow.com/questions/56243672/expected-target-size-50-88-got-torch-size50-288-88

In [8]:
epoch_count = 500
batch_size = 128
shuffle_every_epoch = True
    
if shuffle_every_epoch:
    print(f"shuffle_every_epoch is on")
else:
    print(f"shuffle_every_epoch is off")
    # shuffle train and test set:
    drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
    drum_train = torch.tensor(drum_train, dtype=torch.float)
    bass_train = torch.tensor(bass_train, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
        
for epoch in range(epoch_count):  # loop over the dataset multiple times
    print(f"Epoch #{epoch}")
    if shuffle_every_epoch:
        # shuffle train and test set:
        drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
        drum_train = torch.tensor(drum_train, dtype=torch.float)
        bass_train = torch.tensor(bass_train, dtype=torch.float)
        drum_test = torch.tensor(drum_test, dtype=torch.float)
        bass_test = torch.tensor(bass_test, dtype=torch.float)
        
    examples_count = drum_train.size()[0]
    examples_id = 0
    
    running_loss = 0.0
    runnint_count = 0
    batch_id = 0
    while examples_id < examples_count:
        batch_drum_train = drum_train[examples_id:examples_id + batch_size,:,:].transpose(0,1)
        batch_bass_train = bass_train[examples_id:examples_id + batch_size,].transpose(0,1)
        # transpose нужен для обмена размерности батча и размерности шагов
#         print(f"batch_drum_train:{batch_drum_train.size()}, batch_bass_train:{batch_bass_train.size()}")

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        bass_outputs = dnb_lstm(batch_drum_train).squeeze()
#         bass_outputs = bass_outputs.reshape(bass_outputs.size()[0], -1)
#         batch_bass_train = batch_bass_train.reshape(batch_bass_train.size()[0], -1)
#         print(f"bass_outputs:{bass_outputs.size()} batch_bass_train: {batch_bass_train.size()}")
#         print(f"bass_outputs:{bass_outputs} batch_bass_train: {batch_bass_train}")
        
        # loss = criterion(bass_outputs, batch_bass_train.long())
        loss = criterion(bass_outputs, batch_bass_train)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        runnint_count += 1
        period = 5
        if batch_id % period == 0 or examples_id + batch_size >= examples_count:
            print('[%d, %5d] train loss: %.7f' %
                  (epoch + 1, batch_id + 1, running_loss / runnint_count))
            running_loss = 0.0
            runnint_count = 1
            
        # update batch info
        examples_id += batch_size
        batch_id += 1
        
    # here we can insert measure error on test set

#should check accuracy on validation set
print('Finished Training')

shuffle_every_epoch is on
Epoch #0
[1,     1] train loss: 258.1539917
[1,     5] train loss: 195.4260925
Epoch #1
[2,     1] train loss: 240.0964661
[2,     5] train loss: 184.3658356
Epoch #2
[3,     1] train loss: 229.0117950
[3,     5] train loss: 171.8666382
Epoch #3
[4,     1] train loss: 216.2558899
[4,     5] train loss: 163.2716736
Epoch #4
[5,     1] train loss: 196.6236420
[5,     5] train loss: 149.1508331
Epoch #5
[6,     1] train loss: 182.9964447
[6,     5] train loss: 133.1781982
Epoch #6
[7,     1] train loss: 155.8202820
[7,     5] train loss: 117.2579697
Epoch #7
[8,     1] train loss: 128.0091095
[8,     5] train loss: 94.0577087
Epoch #8
[9,     1] train loss: 101.1207123
[9,     5] train loss: 74.2962463
Epoch #9
[10,     1] train loss: 76.9555817
[10,     5] train loss: 64.9887955
Epoch #10
[11,     1] train loss: 67.4443741
[11,     5] train loss: 64.2468353
Epoch #11
[12,     1] train loss: 64.2493668
[12,     5] train loss: 63.2470749
Epoch #12
[13,     1] trai

[103,     1] train loss: 42.3271980
[103,     5] train loss: 47.2631187
Epoch #103
[104,     1] train loss: 42.3905907
[104,     5] train loss: 45.6935204
Epoch #104
[105,     1] train loss: 42.5125046
[105,     5] train loss: 47.1971970
Epoch #105
[106,     1] train loss: 41.4987221
[106,     5] train loss: 46.5401855
Epoch #106
[107,     1] train loss: 44.0937004
[107,     5] train loss: 46.0301506
Epoch #107
[108,     1] train loss: 40.8332481
[108,     5] train loss: 46.7332108
Epoch #108
[109,     1] train loss: 43.5257530
[109,     5] train loss: 45.4018097
Epoch #109
[110,     1] train loss: 43.6779366
[110,     5] train loss: 44.5048866
Epoch #110
[111,     1] train loss: 39.7247810
[111,     5] train loss: 45.0266876
Epoch #111
[112,     1] train loss: 43.8909874
[112,     5] train loss: 44.9701759
Epoch #112
[113,     1] train loss: 39.6761894
[113,     5] train loss: 44.6905357
Epoch #113
[114,     1] train loss: 42.0943832
[114,     5] train loss: 44.4607529
Epoch #114
[115

KeyboardInterrupt: 

In [9]:
batch_drum_train = drum_train[:,:,:].transpose(0,1)
batch_bass_train = bass_train[:,:].transpose(0,1)
with torch.no_grad():
    bass_outputs = dnb_lstm(batch_drum_train)

In [10]:
result = bass_outputs.squeeze().int()
result

tensor([[ 9,  8,  8,  ..., 10,  7,  7],
        [ 6,  4,  4,  ...,  4,  4,  4],
        [ 6,  5,  5,  ...,  5,  5,  5],
        ...,
        [ 5,  7,  7,  ...,  7,  7,  7],
        [ 5,  7,  7,  ...,  7,  6,  6],
        [ 5,  8,  8,  ...,  6,  7,  7]], dtype=torch.int32)

Попробуем сохранить результаты работы сети. На anaconda нет mido, поэтому сохраняем результаты работы просто в массивчик npy... Однако, как альтернатива, его можно поставить чере pip в conda:
https://github.com/mido/mido/issues/198

In [None]:
import mido
from decode_patterns.data_conversion import build_track, DrumMelodyPair, NumpyImage, Converter


converter = Converter((16,50))

batch_drum = torch.cat((drum_train, drum_test, torch.tensor(drum_validation))).transpose(0,1)
batch_bass = torch.cat((bass_train.int(), bass_test.int(), torch.tensor(bass_validation).int())).transpose(0,1)
with torch.no_grad():
    bass_outputs = dnb_lstm(batch_drum)
    bass_outputs = bass_outputs.squeeze().int()
    
    for i in range(bass_outputs.size()[1]):
        bass_seq = bass_outputs[:,i]
#         bass_seq = batch_bass[:,i]
#         print(f"bass_seq:{bass_seq.size()}")
        bass_output = []
        for bass_note in bass_seq:
            bass_row = np.eye(1, 36, bass_note - 1)[0]
            bass_output.append(bass_row)
        bass_output = torch.tensor(bass_output).int().squeeze()
#         print(f"bass_output:{bass_output.size()}")
        
#         print(f"batch_drum:{batch_drum[:,i,:].size()}, bass_output:{bass_output.size()}")
            
        img_dnb = torch.cat((batch_drum[:,i,:].int(),bass_output), axis=1)
#         print(f"img_dnb:{list(bass_output)}")
        numpy_pair = NumpyImage(np.array(img_dnb), 120, 1, 1, 36)
        pair = converter.convert_numpy_image_to_pair(numpy_pair)
#         print(f"pair.melody:{pair.melody}")
        mid = build_track(pair, tempo=pair.tempo)
        mid.save(f"midi/npy/sample{i+1}.mid")
#         np.save(f"midi/npy/drum{i+1}.npy", batch_drum[:,i,:].int())
#         np.save(f"midi/npy/bass{i+1}.npy", bass_outputs[:,i,:])